In [None]:
import pandas as pd
import time
from openai import OpenAI

# Authentication function for the LLM API
def auth():
    client = OpenAI(
        api_key="",
        base_url="https://sdsc-llm-api.nrp-nautilus.io/"
    )
    return client

# Function to generate text using the LLM
def generate_text(client: OpenAI, model: str, prompt_background: str, prompt_text: str):
    prompt = f"{prompt_background}\n{prompt_text}"
    start_time = time.time()  # Start the timer
    
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "user", "content": prompt}
        ],
        model=model,
    )
    
    end_time = time.time()  # End the timer
    response_time = end_time - start_time  # Calculate response time
    
    # Correctly access the content of the first choice
    return chat_completion.choices[0].message.content, response_time


In [None]:
import geopandas as gpd

sd_zips = gpd.read_file("ZipCodes/geo_export_4cffd5d9-200a-4e2d-abd0-7904bc10857a.shp") 
sd_zips = sd_zips.to_crs('EPSG:3310')
sd_zips.zip = sd_zips.zip.astype(str)
#sd_zips.zip = sd_zips.zip.astype(str).str.split('.').str[0]

# Initialize a dictionary to store neighbors
zip_graph_dict = {}

for index, row in sd_zips.iterrows():
    neighbors = sd_zips[sd_zips.geometry.touches(row['geometry'])].zip.tolist()
    try:
        neighbors.remove(row.zip)
    except ValueError:
        pass
    #neighbors = [str(neighbor).split('.')[0] for neighbor in neighbors]
    zip_graph_dict[row.zip] = neighbors


In [None]:
action_list = """
1. Widespread Testing Initiatives: Implement extensive testing protocols to identify and isolate cases early, including symptomatic and asymptomatic individuals, to curb the spread of the disease.

2. Advanced Vaccination Research: Support the development of vaccines.

3. Targeted Public Health Campaigns: Conduct tailored public health campaigns to educate the community on preventive measures, vaccine importance, and how to seek medical help, focusing on high-risk populations.

4. Enforced Social Distancing Measures: Establish and monitor social distancing guidelines in public spaces, workplaces, and social gatherings to minimize close contact and reduce transmission.

5. Comprehensive Contact Tracing Programs: Implement robust contact tracing systems to quickly identify and notify individuals who have been exposed to confirmed cases, ensuring timely isolation and testing.

6. Establishment of Quarantine and Isolation Facilities: Provide dedicated facilities for the quarantine and isolation of individuals who cannot safely isolate at home, reducing the risk of household transmission.

7. Expansion of Healthcare Capacity: Increase the capacity of healthcare systems, including hospitals and clinics, to handle surges in cases by adding beds, equipment, and staffing.

8. Mental Health Support Services: Offer accessible mental health services to address the psychological impact of the pandemic, including stress, anxiety, and depression, with a focus on frontline workers and vulnerable groups.

9. Economic Support Programs: Provide financial assistance to individuals and businesses affected by the pandemic, including unemployment benefits, grants, and tax relief, to mitigate economic hardship.

10. Telehealth Services Expansion: Promote the use of telehealth to provide medical consultations, reducing the need for in-person visits and minimizing the risk of exposure.

11. Workplace Safety Regulations: Implement stringent safety protocols in workplaces, including regular sanitization, PPE provision, and social distancing, to protect employees and maintain operations.

12. Promotion of Home Isolation and Self-Quarantine Measures: Encourage and support individuals to isolate at home or self-quarantine when symptomatic or after exposure, providing resources and guidance to ensure effective isolation.

13. School Safety and Education Continuity Measures: Establish safety protocols in educational institutions, including hybrid learning models, sanitization, and health screenings, to ensure the safety of students and staff.

14. Enhanced Border, Airport, and Port-of-Entry Surveillance and Control: Strengthen surveillance and control measures at borders, airports, and ports to monitor and regulate the entry of people and goods, preventing the introduction of new infections.

15. Public Transport Safety Enhancements: Implement safety measures in public transportation systems, such as reduced capacity, mandatory masks, and regular disinfection, to protect commuters and reduce transmission risks.

16. Food Security and Nutrition Programs: Ensure the availability of nutritious food for all, particularly vulnerable populations, through food distribution programs and support for local food systems.

17. Community Engagement and Mobilization: Foster community participation in the pandemic response through volunteer programs, local initiatives, and partnerships with community organizations to ensure a coordinated effort.

18. Public Sanitation and Hygiene Campaigns: Promote regular sanitation and hygiene practices in public spaces and households, including handwashing stations and the distribution of sanitizers, to reduce the spread of the virus.

19. Promotion of Remote Work and Digital Transformation: Encourage businesses to adopt remote work practices where possible, supported by digital tools and infrastructure to maintain productivity while reducing physical contact.

20. Legal and Regulatory Adjustments for Pandemic Response: Implement necessary legal and regulatory changes to facilitate the pandemic response, including emergency powers, public health mandates, and enforcement mechanisms.
"""


In [None]:
def prompt_with_non_contiguous(zipcode, zipcode_data, action_list, health_data, budget, combined_df):
    # Convert the entire combined_df to a string format to pass to the LLM
    combined_df_str = combined_df.to_string(index=False)

    prompt_background = f"""You are the San Diego Human Health and Services Agency.
    The description of your organization is as follows:
    The County of San Diego Health and Human Services Agency (HHSA) provides vital health, housing, and social services to more than 3.3 million residents across 18 cities, 18 federally recognized tribal reservations, 16 major naval and military installations, and the unincorporated areas of the County.
    About one in every three county residents is a direct recipient of HHSA services each year, emphasizing the critical role the Agency plays as a robust service network contributing to a region that is Building Better Health, Living Safely, and Thriving. This vision is played out in a collective effort called Live Well San Diego.
    Your goal is to mitigate COVID transmission in San Diego County with a provided set of mitigation strategies.
    You only have a finite amount of resources to perform mitigation activities. These resources are represented using "units". You currently have a total resource of {budget} units.
    Adjust your strategies accordingly to maximize the cost-benefit of your budget versus mitigation results.
    """

    prompt_text = f"""
    You can perform the following activities to mitigate COVID effects:
    {action_list}

    Demographic Data for ZIP code {zipcode}:
    {zipcode_data}

    Health Facilities Data for ZIP code {zipcode}:
    {health_data}

    Entire ZIP Code Data (including potential neighbors):
    {combined_df_str}

    Decide which ZIP codes you consider to be neighbors based on your own spatial awareness, but allocate resources only to the current ZIP code. Use the neighboring ZIP code data to inform and guide your decision-making.
    After determining your recommendations, review them to ensure that all key factors (demographics, healthcare facilities, neighboring ZIP codes) have been adequately considered. Adjust your strategy accordingly to provide a balanced and comprehensive approach.

    Just return a JSON response which should include:
    - "Zipcode": Use this as the key
    - "Strategies": The selected activities and the allocated budget. Use this as the sub-key and the resources as values
    - "Explanation": A concise rationale for selecting these specific strategies, focusing on how they address the unique challenges of the ZIP code and its neighbors.
    
    Ensure that there is no other non-JSON part in your response.
    """

    return prompt_background, prompt_text



In [None]:
def prompt_with_contiguous_neighbors(zipcode, zipcode_data, action_list, health_data, budget, demographic_df, combined_df, grouped_facilities):
    zipcode_str = str(zipcode)
    
    if '.' not in zipcode_str:
        zipcode_str += '.0'

    # Fetch neighboring ZIP codes from zip_graph_dict
    contiguous_zipcodes_info = zip_graph_dict.get(zipcode_str, [])

    # Ensure ZIP codes in combined_df are strings
    combined_df['ZIP_Code'] = combined_df['ZIP_Code'].astype(str)

    nearby_demographics = []
    for neighbor_zip in contiguous_zipcodes_info:
        # Remove any trailing ".0" from neighbor_zip if it exists
        neighbor_zip_clean = neighbor_zip.split('.')[0] if '.' in neighbor_zip else neighbor_zip
        
        # Fetch neighboring data from combined_df (instead of demographic_df)
        neighbor_data = combined_df[combined_df['ZIP_Code'] == neighbor_zip_clean]
        
        if not neighbor_data.empty:
            neighbor_data_str = "\n".join([f"{column}: {value}" for column, value in neighbor_data.iloc[0].items() if column != 'ZIP_Code'])
            nearby_demographics.append(f"ZIP Code {neighbor_zip_clean}:\n{neighbor_data_str}\n")
        else:
            nearby_demographics.append(f"ZIP Code {neighbor_zip_clean}:\nNo demographic data available.\n")

    # Convert the list of nearby ZIP codes to a string for inclusion in the prompt
    nearby_zipcodes_str = "\n".join(nearby_demographics) if nearby_demographics else "No direct neighbors."

    # Construct the prompt background and text
    prompt_background = f"""
    You are tasked with allocating resources to mitigate the spread of COVID-19 in ZIP code {zipcode}. 
    Consider the demographic and healthcare data for this ZIP code and its neighboring areas.

    When making decisions, allocate resources only to the current ZIP code. However, use the neighboring ZIP code data to inform and guide your decision-making, 
    taking into account factors such as population demographics, healthcare facilities, and potential cross-boundary influences.
    """

    prompt_text = f"""
    ZIP Code Data for {zipcode}:
    {zipcode_data}
    
    Healthcare Facility Data for {zipcode}:
    {health_data}
    
    Nearby ZIP Codes and Their Data:
    {nearby_zipcodes_str}
    
    The total budget is {budget} units. Choose how to allocate resources based on the action list provided below:
    {action_list}

    Ensure that the total allocated resources remain within the budget, and explain why you chose these strategies for ZIP code {zipcode}.
    
    Just return a JSON response which should include:
    - "Zipcode": Use this as the key
    - "Strategies": The selected activities and the allocated budget. Use this as the sub-key and the resources as values
    - "Explanation": A concise rationale for selecting these specific strategies, focusing on how they address the unique challenges of the ZIP code and its neighbors.
    
    Ensure that there is no other non-JSON part in your response.
    """

    return prompt_background, prompt_text


In [None]:
import pandas as pd

if __name__ == "__main__":
    budget = 10000

    client = auth()

    demographic_df = pd.read_csv('Filtered_San_Diego_County_ZIP_Code_Data_copy.csv')
    grouped_facilities = pd.read_csv('Grouped_Health_Facilities_by_ZIP_Code.csv')
    combined_df = pd.read_csv('Combined_ZIP_Code_Data.csv')  # The combined data for non-contiguous neighbors

    models = ["meta-llama/Meta-Llama-3.1-70B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1"]

    for zipcode in demographic_df['ZIP_Code'].unique():
        row = demographic_df[demographic_df['ZIP_Code'] == zipcode]
        population = row.iloc[0]['Total_Population']


        # Prepare demographic data for the prompt
        zipcode_data = "\n".join([f"{column}: {row.iloc[0][column]}" for column in row.columns])

        # Find health facility data for the ZIP code
        health_data = grouped_facilities[grouped_facilities['ZIP'] == zipcode]

        for model in models:
            # Test with all (non-contiguous) neighbors
            prompt_background_all, prompt_text_all = prompt_with_non_contiguous(
                zipcode, zipcode_data, action_list, health_data, budget, combined_df
            )
            response_all, response_time_all = generate_text(client, model, prompt_background_all, prompt_text_all)

            # Test with contiguous neighbors only
            prompt_background_contiguous, prompt_text_contiguous = prompt_with_contiguous_neighbors(
                zipcode, zipcode_data, action_list, health_data, budget, demographic_df, combined_df, grouped_facilities
            )
            response_contiguous, response_time_contiguous = generate_text(client, model, prompt_background_contiguous, prompt_text_contiguous)

            # Compare and print results
            print(f"Results for ZIP Code {zipcode} using {model}:")
            print("-" * 20)
            print(f"All Neighbors (Response Time: {response_time_all:.2f} seconds):")
            print(response_all)
            print("-" * 20)
            print(f"Contiguous Neighbors Only (Response Time: {response_time_contiguous:.2f} seconds):")
            print(response_contiguous)
            print("=" * 40)


In [None]:
import pandas as pd
import json

# Create an empty DataFrame to store results, with strategies and allocations included
columns = ['Llama With Contiguous', 'Llama Without Contiguous', 'Mixtral With Contiguous', 'Mixtral Without Contiguous']
results_df = pd.DataFrame(columns=columns)

# Loop through each ZIP code
for zipcode in demographic_df['ZIP_Code'].unique():
    row = demographic_df[demographic_df['ZIP_Code'] == zipcode]
    population = row.iloc[0]['Total_Population']

    # Prepare demographic data for the prompt
    zipcode_data = "\n".join([f"{column}: {row.iloc[0][column]}" for column in row.columns])

    # Find health facility data for the ZIP code
    health_data = grouped_facilities[grouped_facilities['ZIP'] == zipcode]

    # Initialize storage for strategy names and allocations
    llama_with_contiguous = {}
    llama_without_contiguous = {}
    mixtral_with_contiguous = {}
    mixtral_without_contiguous = {}

    # Llama model with and without contiguous neighbors
    for model in ["meta-llama/Meta-Llama-3.1-70B-Instruct"]:
        # With contiguous neighbors
        prompt_background_with, prompt_text_with = prompt_with_contiguous_neighbors(
            zipcode, zipcode_data, action_list, health_data, budget, demographic_df, combined_df, grouped_facilities)
        llama_with_contiguous_response, llama_with_contiguous_time = generate_text(
            client, model, prompt_background_with, prompt_text_with)

        # Parse the result and extract the allocation vector by strategy
        llama_with_contiguous = extract_allocation_vector(llama_with_contiguous_response)

        # Without contiguous neighbors
        prompt_background_without, prompt_text_without = prompt_with_non_contiguous(
            zipcode, zipcode_data, action_list, health_data, budget, combined_df)
        llama_without_contiguous_response, llama_without_contiguous_time = generate_text(
            client, model, prompt_background_without, prompt_text_without)

        llama_without_contiguous = extract_allocation_vector(llama_without_contiguous_response)

    # Mixtral model with and without contiguous neighbors
    for model in ["mistralai/Mixtral-8x7B-Instruct-v0.1"]:
        # With contiguous neighbors
        prompt_background_with, prompt_text_with = prompt_with_contiguous_neighbors(
            zipcode, zipcode_data, action_list, health_data, budget, demographic_df, combined_df, grouped_facilities)
        mixtral_with_contiguous_response, mixtral_with_contiguous_time = generate_text(
            client, model, prompt_background_with, prompt_text_with)

        mixtral_with_contiguous = extract_allocation_vector(mixtral_with_contiguous_response)

        # Without contiguous neighbors
        prompt_background_without, prompt_text_without = prompt_with_non_contiguous(
            zipcode, zipcode_data, action_list, health_data, budget, combined_df)
        mixtral_without_contiguous_response, mixtral_without_contiguous_time = generate_text(
            client, model, prompt_background_without, prompt_text_without)

        mixtral_without_contiguous = extract_allocation_vector(mixtral_without_contiguous_response)

    # Insert the results into the DataFrame as dictionaries
    results_df.loc[zipcode] = [
        llama_with_contiguous, 
        llama_without_contiguous, 
        mixtral_with_contiguous, 
        mixtral_without_contiguous
    ]

results_df.to_csv('ZIP_Code_Allocation_Results.csv', index=True)

print("Results saved to ZIP_Code_Allocation_Results.csv")

In [None]:
def extract_allocation_vector(llm_response):
    try:
        # Debug: Print the response to identify any issues
        print(f"LLM Response:\n{llm_response}")

        # Clean the response by removing any non-JSON parts (if applicable)
        start_index = llm_response.find("{")
        if start_index == -1:
            raise ValueError("No JSON object found in the response")

        # Extract the JSON part by looking for the first and last curly braces
        end_index = llm_response.rfind("}")
        json_str = llm_response[start_index:end_index + 1]

        # Attempt to load the JSON
        response_json = json.loads(json_str)

        # Initialize an empty dictionary to store the allocation vectors for each ZIP code
        allocation_vectors = {}

        # Check if the response contains a direct 'Strategies' object or nested under a ZIP code key
        if "Strategies" in response_json:
            # Single ZIP code case where strategies are directly included
            strategies = response_json["Strategies"]
            allocation_vectors[response_json.get("Zipcode", "Unknown ZIP Code")] = strategies
        else:
            # Case where the response contains multiple ZIP codes
            for zipcode_key in response_json.keys():
                if isinstance(response_json[zipcode_key], dict) and 'Strategies' in response_json[zipcode_key]:
                    strategies = response_json[zipcode_key]['Strategies']
                    allocation_vectors[zipcode_key] = strategies
                else:
                    print(f"No 'Strategies' found for ZIP code {zipcode_key}")

        return allocation_vectors

    except json.JSONDecodeError as e:
        print(f"Error extracting allocation vector: Invalid JSON structure. {e}")
        return {}
    except Exception as e:
        print(f"Error extracting allocation vector: {e}")
        return {}


In [None]:
print(prompt_text_all)

In [None]:
print(prompt_text_contiguous)