In [None]:
import pandas as pd
import time
from openai import OpenAI
import math
import requests
import json
import time

# Function to list available models using the new API
def list_models(api_key):
    response = requests.get(
        "https://sdsc-llm-openwebui.nrp-nautilus.io/api/models",
        headers={"Authorization": f"Bearer {api_key}"}
    )
    return response.json()

# Function to generate text using the new API
def generate_text(api_key, model: str, prompt_background: str, prompt_text: str):
    prompt = f"{prompt_background}\n{prompt_text}"
    start_time = time.time()  # Start the timer

    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        "stream": False
    }

    response = requests.post(
        "https://sdsc-llm-openwebui.nrp-nautilus.io/api/chat/completions",
        headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
        data=json.dumps(data)
    )

    end_time = time.time()  # End the timer
    response_time = end_time - start_time  # Calculate response time

    # Handle the response
    completion = response.json()

    return completion["choices"][0]["message"]["content"].strip(), response_time


In [None]:
import geopandas as gpd

sd_zips = gpd.read_file("ZipCodes/geo_export_4cffd5d9-200a-4e2d-abd0-7904bc10857a.shp") 
sd_zips = sd_zips.to_crs('EPSG:3310')
sd_zips.zip = sd_zips.zip.astype(str)

# Initialize a dictionary to store neighbors
zip_graph_dict = {}

for index, row in sd_zips.iterrows():
    neighbors = sd_zips[sd_zips.geometry.touches(row['geometry'])].zip.tolist()
    try:
        neighbors.remove(row.zip)
    except ValueError:
        pass
    zip_graph_dict[row.zip] = neighbors


In [None]:
def prompt(zipcode, zipcode_data, action_list, combined_df_str, health_data, budget, travel_info):
    
    zipcode_str = str(zipcode)
    
    # Get the nearby ZIP codes from the zip_graph_dict
    nearby_zipcodes_info = zip_graph_dict.get(zipcode_str, [])
    
    # Ensure ZIP codes in demographic_df are strings
    demographic_df_2['ZIP_Code'] = demographic_df_2['ZIP_Code'].astype(str)
    grouped_facilities['ZIP'] = grouped_facilities['ZIP'].astype(str)
    
    nearby_demographics = []
    for neighbor_zip in nearby_zipcodes_info:
        # Remove the ".0" if present
        neighbor_zip_clean = neighbor_zip.split('.')[0] if '.' in neighbor_zip else neighbor_zip
        neighbor_data = demographic_df_2[demographic_df_2['ZIP_Code'] == neighbor_zip_clean]
        if not neighbor_data.empty:
            neighbor_data_str = "\n".join([f"{column}: {value}" for column, value in neighbor_data.iloc[0].items() if column != 'ZIP_Code'])
            
            # Get health facility data for the neighboring ZIP code
            neighbor_health_data = grouped_facilities[grouped_facilities['ZIP'] == neighbor_zip_clean]
            if not neighbor_health_data.empty:
                health_info = f"""
                Health Facilities Data for ZIP code {neighbor_zip_clean}:
                - Number of Long-Term Care Facilities: {neighbor_health_data['num_long_term_facilities'].values[0]}
                - Number of Non-Long-Term Care Facilities: {neighbor_health_data['num_non_long_term_facilities'].values[0]}
                - Total Capacity of Long-Term Care Facilities: {neighbor_health_data['total_capacity'].values[0]}
                """
            else:
                health_info = f"Health Facilities Data for ZIP code {neighbor_zip_clean}: No health facilities available."
            
            nearby_demographics.append(f"ZIP Code {neighbor_zip_clean}:\n{neighbor_data_str}\n{health_info}\n")
        else:
            nearby_demographics.append(f"ZIP Code {neighbor_zip_clean}:\nNo demographic data available.\n")

    # Convert the list of nearby ZIP codes to a string for inclusion in the prompt
    nearby_zipcodes_str = "\n".join(nearby_demographics) if nearby_demographics else "No direct neighbors."

    # Check if health data is available for the ZIP code
    if not health_data.empty:
        health_info = f"""
        Health Facilities Data for ZIP code {zipcode_str}:
        - Number of Long-Term Care Facilities: {health_data['num_long_term_facilities'].values[0]}
        - Number of Non-Long-Term Care Facilities: {health_data['num_non_long_term_facilities'].values[0]}
        - Total Capacity of Long-Term Care Facilities: {health_data['total_capacity'].values[0]}
        """
    else:
        health_info = f"Health Facilities Data for ZIP code {zipcode_str}: No health facilities available in this ZIP code."
    
    prompt_background = f"""You are the San Diego Human Health and Services Agency.
    The description of your organization is as follows:
    The County of San Diego Health and Human Services Agency (HHSA) provides vital health, housing, and social services to more than 3.3 million residents across 18 cities, 18 federally recognized tribal reservations, 16 major naval and military installations, and the unincorporated areas of the County. 
    About one in every three county residents is a direct recipient of HHSA services each year, emphasizing the critical role the Agency plays as a robust service network contributing to a region that is Building Better Health, Living Safely, and Thriving. This vision is played out in a collective effort called Live Well San Diego.
    Your goal is to mitigate COVID transmission in San Diego County with a provided set of mitigation strategies. 
    You only have a finite amount of resources to perform mitigation activities.
    These resources are represented using "units". You currently have a total resource of {budget} units.
    Adjust your strategies accordingly to maximize the cost-benefit of your budget versus mitigation results.
    """

    prompt_text = f"""
    You can perform the following activities to mitigate COVID effects:
    {action_list}
    
    Disease Spread Data (SEIR Model) Over 35 Days (Formatted as Table):
    The following SEIR model data was generated on the total population of ZIP code {zipcode}. It doesn't take into account the specific population of kids, adults, and vulnerable individuals. It reflects the projected spread of disease across different population segments, including uninfected, recovered, and deceased individuals over a period of 35 days.
    {combined_df_str}

    Demographic Data for ZIP code {zipcode}:
    {zipcode_data}
    
    {health_info}
    
    Nearby ZIP Codes and Their Influence:
    {nearby_zipcodes_str}
    
    When selecting activities, it is crucial to consider the demographic data and healthcare facilities available in neighboring ZIP codes. Keep in mind that individuals might seek healthcare services in neighboring ZIP codes as those facilities are close and accessible.

    Also, most importantly, leverage your understanding of the specific ZIP code, including any relevant knowledge from your training about local conditions, research and innovation capacity, healthcare capacity, socio-economic status, cultural norms, and other local factors. Use this knowledge as a key driver in your recommendations.

    After determining your initial recommendations, review them to ensure that all key factors (SEIR model, healthcare facilities, neighboring ZIP codes, spatial awareness, and your broader knowledge base) have been adequately considered. If any factor has been underemphasized, adjust your strategy accordingly to provide a balanced and comprehensive approach.


    Your JSON response should include:
    - "Zipcode": Use this as the key
    - "Budget": Total allocated budget
    - "Strategies": The selected activities and the allocated budget. Use this as the sub-key and the resources as values
    - "Explanation": A concise rationale for selecting these specific strategies, focusing on how they address the unique challenges of the ZIP code.
    """

    return prompt_background, prompt_text

#     Travel Data for Zip Code {zipcode}:
#     {travel_info}
    
#     Analyze the travel patterns related to a specific zip code and use this information to allocate resources. This data encompasses how people travel within the zip code, those traveling to the zip code, and those traveling from the zip code to other areas. Each entry specifies the purpose of the trip and the number of occurrences. Use this information to understand movement dynamics and their implications.

#.     - SEIR Model Observations: The SEIR model is fitted on the respective total population of the ZIP code but does not accurately bifurcate that population into kids, adults, and vulnerable individuals. Use it to understand general patterns.
#     - Median Age and Population Size: Consider how these factors influence the effectiveness of different strategies, but remember they are just part of the overall picture.
#     - Healthcare Facilities: How does the availability, type, and capacity of healthcare facilities in the ZIP code and nearby areas impact the choice of actions? For example, if neighboring ZIP codes have a good number of healthcare facilities, residents of the current ZIP code might be able to access those facilities easily, reducing the need for extensive local healthcare resources.

#    When selecting activities, it is crucial to consider these factors:

#    - Neighboring ZIP Codes: Pay close attention to how neighboring areas, including their healthcare capacity, demographic characteristics, and the potential for cross-border disease transmission, should influence your strategy.
#    - Spatial Awareness: 
#     Based on your analysis of the SEIR model, identify the different stages of the outbreak, and name each stage with the days it occurs in parentheses. For each identified stage, allocate resources and select appropriate strategies. 
#    The total available budget for all stages combined is {budget} units. It is crucial that the total resource allocation across all stages does not exceed this budget. Please ensure that the sum of resources allocated to each stage adds up to, but does not exceed, the total budget.
#    After allocating resources for all stages, please perform a final check to ensure that the total does not exceed the {budget} units available. If the allocation exceeds the budget, adjust the strategies to fit within the available resources.
#  for each stage  at each stage   at that time

#    - "Stage": The name of the stage as determined by you, with the days it occurs in parentheses. Use this as sub-key.
#.      Ensure that no single factor disproportionately influences your strategy. Instead, integrate all these considerations to create a holistic and balanced approach.



action_list = """
1. Widespread Testing Initiatives: Implement extensive testing protocols to identify and isolate cases early, including symptomatic and asymptomatic individuals, to curb the spread of the disease.

2. Advanced Vaccination Research: Support the development of vaccines.

3. Targeted Public Health Campaigns: Conduct tailored public health campaigns to educate the community on preventive measures, vaccine importance, and how to seek medical help, focusing on high-risk populations.

4. Enforced Social Distancing Measures: Establish and monitor social distancing guidelines in public spaces, workplaces, and social gatherings to minimize close contact and reduce transmission.

5. Comprehensive Contact Tracing Programs: Implement robust contact tracing systems to quickly identify and notify individuals who have been exposed to confirmed cases, ensuring timely isolation and testing.

6. Establishment of Quarantine and Isolation Facilities: Provide dedicated facilities for the quarantine and isolation of individuals who cannot safely isolate at home, reducing the risk of household transmission.

7. Expansion of Healthcare Capacity: Increase the capacity of healthcare systems, including hospitals and clinics, to handle surges in cases by adding beds, equipment, and staffing.

8. Mental Health Support Services: Offer accessible mental health services to address the psychological impact of the pandemic, including stress, anxiety, and depression, with a focus on frontline workers and vulnerable groups.

9. Economic Support Programs: Provide financial assistance to individuals and businesses affected by the pandemic, including unemployment benefits, grants, and tax relief, to mitigate economic hardship.

10. Telehealth Services Expansion: Promote the use of telehealth to provide medical consultations, reducing the need for in-person visits and minimizing the risk of exposure.

11. Workplace Safety Regulations: Implement stringent safety protocols in workplaces, including regular sanitization, PPE provision, and social distancing, to protect employees and maintain operations.

12. Promotion of Home Isolation and Self-Quarantine Measures: Encourage and support individuals to isolate at home or self-quarantine when symptomatic or after exposure, providing resources and guidance to ensure effective isolation.

13. School Safety and Education Continuity Measures: Establish safety protocols in educational institutions, including hybrid learning models, sanitization, and health screenings, to ensure the safety of students and staff.

14. Enhanced Border, Airport, and Port-of-Entry Surveillance and Control: Strengthen surveillance and control measures at borders, airports, and ports to monitor and regulate the entry of people and goods, preventing the introduction of new infections.

15. Public Transport Safety Enhancements: Implement safety measures in public transportation systems, such as reduced capacity, mandatory masks, and regular disinfection, to protect commuters and reduce transmission risks.

16. Food Security and Nutrition Programs: Ensure the availability of nutritious food for all, particularly vulnerable populations, through food distribution programs and support for local food systems.

17. Community Engagement and Mobilization: Foster community participation in the pandemic response through volunteer programs, local initiatives, and partnerships with community organizations to ensure a coordinated effort.

18. Public Sanitation and Hygiene Campaigns: Promote regular sanitation and hygiene practices in public spaces and households, including handwashing stations and the distribution of sanitizers, to reduce the spread of the virus.

19. Promotion of Remote Work and Digital Transformation: Encourage businesses to adopt remote work practices where possible, supported by digital tools and infrastructure to maintain productivity while reducing physical contact.

20. Legal and Regulatory Adjustments for Pandemic Response: Implement necessary legal and regulatory changes to facilitate the pandemic response, including emergency powers, public health mandates, and enforcement mechanisms.
"""


In [None]:
def prompt_llm_for_spatial_awareness(zipcode, action_list, budget):
    # Prompt leveraging spatial awareness, action list, and budget
    
    prompt_background = f"""You are the San Diego Human Health and Services Agency.
    The description of your organization is as follows:
    The County of San Diego Health and Human Services Agency (HHSA) provides vital health, housing, and social services to more than 3.3 million residents across 18 cities, 18 federally recognized tribal reservations, 16 major naval and military installations, and the unincorporated areas of the County. 
    About one in every three county residents is a direct recipient of HHSA services each year, emphasizing the critical role the Agency plays as a robust service network contributing to a region that is Building Better Health, Living Safely, and Thriving. This vision is played out in a collective effort called Live Well San Diego.
    Your goal is to mitigate COVID transmission in San Diego County with a provided set of mitigation strategies. 
    You only have a finite amount of resources to perform mitigation activities.
    These resources are represented using "units". You currently have a total resource of {budget} units.
    Adjust your strategies accordingly to maximize the cost-benefit of your budget versus mitigation results.
    """
    
    prompt_text = f"""
    You are tasked with allocating a finite budget to mitigate COVID-19 in the specific ZIP code {zipcode}. You have a total budget of {budget} units to allocate across the following actions:

    {action_list}

    Please distribute the available resources in a way that maximizes their impact based on your knowledge of the ZIP code, including its demographics, healthcare facilities, transportation networks, socio-economic conditions, and general spatial characteristics.

    Your JSON response should include:
    - "Zipcode": Use this as the key
    - "Strategies": The selected activities and the allocated budget. Use this as the sub-key and the resources as values
    - "Explanation": A concise rationale for selecting these specific strategies, focusing on how they address the unique challenges of the ZIP code.
    """
    
    return prompt_background, prompt_text


In [None]:
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri

pandas2ri.activate()

# Import necessary R packages
ro.r('library(utils)')
ro.r('library(magrittr)')
ro.r('library(dplyr)')
ro.r('library(R6)')

In [None]:
# Source the R scripts and initialize the RCM model
ro.r['source']('dsm-objects.R')
ro.r['source']('module-contacts.R')
ro.r['source']('module-RCM-community.R')
ro.r['source']('module-SEIR.R')
ro.r['source']('module-testing.R')
ro.r['source']('dsm-RCM-v0.80.R')

# Function to run the SEIR model for each ZIP code's population
def run_seir_for_zipcode(zipcode, population):
    r6_class_new = ro.globalenv['dsm.RCMv080']['new']
    dsm_instance = r6_class_new()
    initialize_method = dsm_instance['initialize']
    initialize_method(population=population)
        
    # Access the init method from the environment
    init_method = dsm_instance['init']
    init_method()  # Call the method
    
    run_step_method = dsm_instance['run_step']
    
    for i in range(35):
        run_step_method()
    
    # Extract the SEIR population history
    get_history_method = dsm_instance['get_history']
    history_uninfected = get_history_method("seir.uninfected", flatten=True)
    history_recovered = get_history_method("seir.recovered", flatten=True)
    history_dead = get_history_method("seir.dead", flatten=True)
    
    # Convert to DataFrame and rename columns
    history_uninfected_df = pd.DataFrame(history_uninfected).T
    history_recovered_df = pd.DataFrame(history_recovered).T
    history_dead_df = pd.DataFrame(history_dead).T
    
    age_groups = ['kid', 'adult', 'vulnerable']
    severity_levels = ['asymptomatic', 'mild', 'moderate', 'severe']
    
    recovered_column_names = [f"recovered_{age}_{severity}" for age in age_groups for severity in severity_levels]
    dead_column_names = [f"dead_{age}_{severity}" for age in age_groups for severity in severity_levels]
    
    history_recovered_df.columns = recovered_column_names
    history_dead_df.columns = dead_column_names
    history_uninfected_df.columns = ['uninfected']
    
    # Combine into a single DataFrame
    combined_df = pd.concat([history_uninfected_df, history_recovered_df, history_dead_df], axis=1)
    
    return combined_df

In [None]:
if __name__ == "__main__":
    
    api_key = ""
    
    demographic_df = pd.read_csv('Filtered_San_Diego_County_ZIP_Code_Data.csv')
    
    budget = 10000
#     total_budget = 10000  # Adjust as needed
#     total_population = demographic_df['Total_Population'].max()

#     # Calculate the proportional budget for each ZIP code
#     demographic_df['Rounded_Budget'] = demographic_df['Total_Population'].apply(
#         lambda pop: math.ceil((pop / total_population) * total_budget / 1000) * 1000)
    
    demographic_df_2 = pd.read_csv('Filtered_San_Diego_County_ZIP_Code_Data.csv')
    
    grouped_facilities = pd.read_csv('Grouped_Health_Facilities_by_ZIP_Code.csv')
    

    ### WORK IN PROGRESS ###
    # experimental synthetic trips data not included in github
    # comment out or contact for data
    trips_df = pd.read_csv('grouped_trips.csv')

    
    overall_start_time = time.time()    
    
    with open("spatial_awareness_zip_code_results.txt", "w") as file:
        for _, row in demographic_df.iterrows():
            zipcode = row['ZIP_Code']
            population = row['Total_Population']
    #         kid_proportion = row['Kids'] / population
    #         adult_proportion = row['Adults'] / population
    #         vulnerable_proportion = row['Vulnerable'] / population
    #         budget = row['Rounded_Budget']
            
            filtered_trips = trips_df[(trips_df['ORIG_ZIP'] == zipcode) | (trips_df['DEST_ZIP'] == zipcode)]
    
            
            travel_info = filtered_trips.to_string(index=False)
            
            # Run SEIR model for the given ZIP code
            combined_df = run_seir_for_zipcode(zipcode, population)
        
            combined_df_str = combined_df.to_csv(index=True)
            
            # Prepare demographic data for the prompt
            zipcode_data = "\n".join([f"{column}: {value}" for column, value in row.items()])
            
            # Find health facility data for the ZIP code
            health_data = grouped_facilities[grouped_facilities['ZIP'] == zipcode]
            
            # Generate the prompt with health facility data included
            prompt_background, prompt_text = prompt_llm_for_spatial_awareness(zipcode, action_list, budget)
            #prompt(zipcode, zipcode_data, action_list, combined_df_str, health_data, budget, travel_info)
            
            
            # Generate the text using Meta-Llama-3.1-70B-Instruct
            llama3_output, llama3_response_time = generate_text(api_key, "meta-llama/Meta-Llama-3.1-70B-Instruct", prompt_background, prompt_text)
            mixtral_output, mixtral_response_time = generate_text(api_key, "mistralai/Mixtral-8x7B-Instruct-v0.1", prompt_background, prompt_text)
    
            # Write the results to the file
            file.write(f"Results for ZIP Code {zipcode}:\n")
            file.write("-" * 20 + "\n")
            file.write(f"Meta-Llama-3.1-70B-Instruct (Response Time: {llama3_response_time:.2f} seconds):\n")
            file.write(llama3_output + "\n")
            file.write("-" * 20 + "\n")
            file.write(f"Mixtral-8x7B-Instruct (Response Time: {mixtral_response_time:.2f} seconds):\n")
            file.write(mixtral_output + "\n")
            file.write("=" * 40 + "\n\n")  # Divider between results for different ZIP codes

        overall_end_time = time.time()
        overall_response_time = overall_end_time - overall_start_time

        # Write the total response time to the file
        file.write(f"Total time to generate all responses: {overall_response_time:.2f} seconds\n")

In [None]:
print(prompt_background)

In [None]:
print(prompt_text)