#  Install package

In [2]:
# ! pip install pyvis
# ! pip install networkx

# Import package

In [1]:
import os
import requests
import networkx as nx
import pandas as pd
import geopandas as gpd
from pyvis.network import Network
import openai


# Define Solution class
Please run the following cell to define the functions

In [25]:
# %load_ext autoreload
# %autoreload 2

import LLM_Geo_Constants as constants
import helper
# import LLM_Geo_kernel.Solution as Solution

from LLM_Geo_kernel import Solution

%load_ext autoreload
%autoreload 2

OpenAI_key: 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
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Demonstration 1: Resident living with hazardous wastes

## Input task and data desciption

In [12]:
TASK = r"""1) Find out the total population that lives within a tract that contain hazardous waste facilities. The study area is North Carolina, US.
2) Generate a map to show the spatial distribution of population at the tract level and highlight those tracts that contain at least one hazardous waste facility.
"""

DATA_LOCATIONS = ["NC hazardous waste facility ESRI shape file location: https://github.com/gladcolor/LLM- Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip.",
                  "NC tract boundary shapefile location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip. The tract id column is 'Tract'.",
                  "NC tract population CSV file location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/NC_tract_population.csv. The population is stored in 'TotalPopulation' column. The tract ID column is 'GEOID'."
                 ]

task_name ='Resident_at_risk_counting'
save_dir = os.path.join(os.getcwd(), task_name)
os.makedirs(save_dir, exist_ok=True)

# create graph
# model=r"gpt-3.5-turbo"
model=r"gpt-4"
solution = Solution(
                    task=TASK,
                    task_name=task_name,
                    save_dir=save_dir,
                    data_locations=DATA_LOCATIONS,
                    model=model,
                    )
print("Prompt to get solution graph:\n")
print(solution.graph_prompt)

Prompt to get solution graph:

Your role: A professional Geo-information scientist and developer good at Python. 
Task: Generate a graph (data structure) only, whose nodes are (1) a series of consecutive steps and (2) data to solve this question:  
 1) Find out the total population that lives within a tract that contain hazardous waste facilities. The study area is North Carolina, US.
2) Generate a map to show the spatial distribution of population at the tract level and highlight those tracts that contain at least one hazardous waste facility.
 
Data locations (each data is a node): 1. NC hazardous waste facility ESRI shape file location: https://github.com/gladcolor/LLM- Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip.
2. NC tract boundary shapefile location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip. The tract id column is 'Tract'.
3. NC tract population CSV file location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analy

## Get graph code from GPT API

In [13]:
response_for_graph = solution.get_LLM_response_for_graph() 
solution.graph_response = response_for_graph
solution.save_solution()
print()
print("Code to generate solution graph: \n")
print(solution.code_for_graph)

Geting reply...
Got reply.

Code to generate solution graph: 

import networkx as nx
G = nx.DiGraph()

# 1 Load hazardous waste site shapefile
G.add_node("haz_waste_shp_url", node_type="data", data_path="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip", description="Hazardous waste facility shapefile URL")
G.add_node("load_haz_waste_shp", node_type="operation", description="Load hazardous waste facility shapefile")
G.add_edge("haz_waste_shp_url", "load_haz_waste_shp")
G.add_node("haz_waste_gdf", node_type="data", description="Hazardous waste facility GeoDataFrame")
G.add_edge("load_haz_waste_shp", "haz_waste_gdf")

# 2 Load NC tract boundary shapefile
G.add_node("tract_shp_url", node_type="data", data_path="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip", description="NC tract boundary shapefile URL")
G.add_node("load_tract_shp", node_type="operation", description="Load NC tract boundary shapefile")
G.add_edg

## Execute code to generate the solution graphto generate the solution graph 

In [15]:
exec(solution.code_for_graph)
solution_graph = solution.load_graph_file()

# Show the graph
G = nx.read_graphml(solution.graph_file)  
nt = helper.show_graph(G)
html_name = os.path.join(os.getcwd(), solution.task_name + '.html')  
# HTML file should in the same directory. See:
# https://stackoverflow.com/questions/65564916/error-displaying-pyvis-html-inside-jupyter-lab-cell
nt.show(name=html_name)
# html_name

E:\OneDrive_USC\OneDrive - University of South Carolina\Research\Spatial_ChatGPT\LLM_Geo\Resident_at_risk_counting.html


## Generate prompts and code for operations (functions)

In [26]:
operations = solution.get_LLM_responses_for_operations()
solution.save_solution()
all_operation_code_str = '\n'.join([operation['operation_code'] for operation in operations])
print("All operation code: \n")
print(all_operation_code_str)

1 / 7, load_haz_waste_shp
Geting reply...
Got reply.
2 / 7, load_tract_shp
Geting reply...
Got reply.
3 / 7, load_tract_pop_csv
Geting reply...
Got reply.
4 / 7, find_tracts_with_haz_waste
Geting reply...
Got reply.
5 / 7, join_pop_to_tract
Geting reply...
Got reply.
6 / 7, calc_total_pop_at_risk
Geting reply...
Got reply.
7 / 7, generate_map
Geting reply...
Got reply.
All operation code: 

import geopandas as gpd

def load_haz_waste_shp(haz_waste_shp_url="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip"):
    """
    Load hazardous waste facility shapefile
    
    Args:
    haz_waste_shp_url: Hazardous waste facility shapefile URL
    
    Returns:
    haz_waste_gdf: GeoDataFrame of hazardous waste facilities
    """

    # Load the hazardous waste shapefile directly from the URL
    haz_waste_gdf = gpd.read_file(haz_waste_shp_url)
    
    return haz_waste_gdf
import geopandas as gpd

def load_tract_shp(tract_shp_url='https://github.com/gla

## Generate prompts and code for assembly program

In [32]:
assembly_LLM_response = solution.get_LLM_assembly_response()
# solution.assembly_LLM_response = assembly_LLM_response
solution.save_solution()

print("Assembly code: \n")
print(solution.code_for_assembly)

Assembly code: 

# Main Program

# Step 1: Load hazardous waste facility shapefile
haz_waste_gdf = load_haz_waste_shp()

# Step 2: Load NC tract boundary shapefile
tract_gdf = load_tract_shp()

# Step 3: Load NC tract population CSV file
tract_pop_df = load_tract_pop_csv()

# Step 4: Join population to tract GeoDataFrame
pop_tract_gdf = join_pop_to_tract(tract_gdf, tract_pop_df)

# Step 5: Find tracts with hazardous waste facilities
tracts_with_haz_waste = find_tracts_with_haz_waste(haz_waste_gdf, pop_tract_gdf)

# Step 6: Calculate total population living in tracts with hazardous waste facilities
total_pop_at_risk = calc_total_pop_at_risk(pop_tract_gdf, tracts_with_haz_waste)
print("Total population living in tracts with hazardous waste facilities:", total_pop_at_risk)

# Step 7: Generate the map
generate_map(pop_tract_gdf, tracts_with_haz_waste)


## Execute assembly code

In [36]:
all_operation_code_str = '\n'.join([operation['operation_code'] for operation in operations])
all_code = all_operation_code_str + '\n' + solution.code_for_assembly
exec(all_code)

  exec(all_code)


KeyError: 'TRACTCE'

In [37]:
print(all_code)

import geopandas as gpd

def load_haz_waste_shp(haz_waste_shp_url="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip"):
    """
    Load hazardous waste facility shapefile
    
    Args:
    haz_waste_shp_url: Hazardous waste facility shapefile URL
    
    Returns:
    haz_waste_gdf: GeoDataFrame of hazardous waste facilities
    """

    # Load the hazardous waste shapefile directly from the URL
    haz_waste_gdf = gpd.read_file(haz_waste_shp_url)
    
    return haz_waste_gdf
import geopandas as gpd

def load_tract_shp(tract_shp_url='https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip'):
    """
    Description: Load NC tract boundary shapefile
    
    Parameters:
    - tract_shp_url: Tract boundary shapefile URL
    
    Returns:
    - tract_gdf: Tract boundary GeoDataFrame
    """
    tract_gdf = gpd.read_file(tract_shp_url)
    return tract_gdf
def load_tract_pop_csv(tract_pop_csv_url='https://github.com/glad