#  Install package

In [2]:
# ! pip install pyvis
# ! pip install networkx

# Import package

In [1]:
import os
import requests
import networkx as nx
import pandas as pd
import geopandas as gpd
from pyvis.network import Network
import openai


# Define Solution class
Please run the following cell to define the functions

In [2]:
# %load_ext autoreload
# %autoreload 2

import LLM_Geo_Constants as constants
import helper
# import LLM_Geo_kernel.Solution as Solution

from LLM_Geo_kernel import Solution

%load_ext autoreload
%autoreload 2

# Demonstration 1: Resident living with hazardous wastes

## Input task and data desciption

In [3]:
TASK = r"""1) Find out the total population that lives within a tract that contain hazardous waste facilities. The study area is North Carolina, US.
2) Generate a map to show the spatial distribution of population at the tract level and highlight those tracts that contain at least one hazardous waste facility.
"""

DATA_LOCATIONS = ["NC hazardous waste facility ESRI shape file location: https://github.com/gladcolor/LLM- Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip.",
                  "NC tract boundary shapefile location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip. The tract id column is 'Tract'.",
                  "NC tract population CSV file location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/NC_tract_population.csv. The population is stored in 'TotalPopulation' column. The tract ID column is 'GEOID'."
                 ]

task_name ='Resident_at_risk_counting'
save_dir = os.path.join(os.getcwd(), task_name)
os.makedirs(save_dir, exist_ok=True)

# create graph
# model=r"gpt-3.5-turbo"
model=r"gpt-4"
solution = Solution(
                    task=TASK,
                    task_name=task_name,
                    save_dir=save_dir,
                    data_locations=DATA_LOCATIONS,
                    model=model,
                    )
print("Prompt to get solution graph:\n")
print(solution.graph_prompt)

Prompt to get solution graph:

Your role: A professional Geo-information scientist and developer good at Python. 
Task: Generate a graph (data structure) only, whose nodes are (1) a series of consecutive steps and (2) data to solve this question:  
 1) Find out the total population that lives within a tract that contain hazardous waste facilities. The study area is North Carolina, US.
2) Generate a map to show the spatial distribution of population at the tract level and highlight those tracts that contain at least one hazardous waste facility.
 
Data locations (each data is a node): 1. NC hazardous waste facility ESRI shape file location: https://github.com/gladcolor/LLM- Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip.
2. NC tract boundary shapefile location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip. The tract id column is 'Tract'.
3. NC tract population CSV file location: https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analy

## Get graph code from GPT API

In [4]:
response_for_graph = solution.get_LLM_response_for_graph() 
solution.graph_response = response_for_graph
solution.save_solution()
print()
print("Code to generate solution graph: \n")
print(solution.code_for_graph)

Geting reply...
Got reply.

Code to generate solution graph: 

import networkx as nx

G = nx.DiGraph()

# 1 Load hazardous waste site shapefile
G.add_node("haz_waste_shp_url", node_type="data", data_path="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/Hazardous_Waste_Sites.zip", description="Hazardous waste facility shapefile URL")
G.add_node("load_haz_waste_shp", node_type="operation", description="Load hazardous waste facility shapefile")
G.add_edge("haz_waste_shp_url", "load_haz_waste_shp")
G.add_node("haz_waste_gdf", node_type="data", description="Hazardous waste facility GeoDataFrame")
G.add_edge("load_haz_waste_shp", "haz_waste_gdf")

# 2 Load NC tract boundary shapefile
G.add_node("tract_shp_url", node_type="data", data_path="https://github.com/gladcolor/LLM-Geo/raw/master/overlay_analysis/tract_shp_37.zip", description="NC Tract boundary shapefile URL")
G.add_node("load_tract_shp", node_type="operation", description="Load NC Tract boundary shapefile")
G.add_ed

## Execute code to generate the solution graphto generate the solution graph

In [12]:
exec(solution.code_for_graph)
solution_graph = solution.load_graph_file()

# Show the graph
G = nx.read_graphml(solution.graph_file)  
nt = helper.show_graph(G)
html_name = os.path.join(os.getcwd(), solution.task_name + '.html')  
# HTML file should in the same directory. See:
# https://stackoverflow.com/questions/65564916/error-displaying-pyvis-html-inside-jupyter-lab-cell
nt.show(name=html_name)
# html_name

E:\Research\LLM-Geo\Resident_at_risk_counting.html


## Generate prompts and code for operations (functions)

3

In [18]:
operations = solution.get_LLM_responses_for_operations()
solution.save_solution()
operations
# all_operation_code_str = '\n'.join([operation['operation_code'] for operation in operations])
# print("All operation code: \n")
# print(all_operation_code_str)

[]

## Generate prompts and code for assembly program

In [10]:
assembly_LLM_response = solution.get_LLM_assembly_response()
# solution.assembly_LLM_response = assembly_LLM_response
solution.save_solution()

print("Assembly code: \n")
print(solution.code_for_assembly)

Geting reply...
Got reply.
import geopandas as gpd
import pandas as pd

def main():
    # Load hazardous waste facility GeoDataFrame
    haz_waste_gdf = load_haz_waste_shp()

    # Load NC Tract boundary GeoDataFrame
    tract_gdf = load_tract_shp()

    # Load NC Tract population DataFrame
    tract_pop_df = load_population_csv()

    # Spatial join between tract and hazardous waste facility
    tract_haz_join = gpd.sjoin(tract_gdf, haz_waste_gdf, how="inner", op="intersects")

    # Group by Tract and count hazardous waste facilities
    tract_haz_counts = tract_haz_join.groupby("Tract")["index_right"].count().reset_index(name="Haz_waste_count")

    # Merge population and hazardous waste counts
    tract_pop_haz = tract_pop_df.merge(tract_haz_counts, left_on="GEOID", right_on="Tract", how="left")

    # Calculate total population in hazardous waste-affected tracts
    total_pop_in_haz_tracts = tract_pop_haz.loc[tract_pop_haz["Haz_waste_count"] > 0, "TotalPopulation"].sum()
    print

## Execute assembly code

In [11]:
all_operation_code_str = '\n'.join([operation['operation_code'] for operation in operations])
all_code = all_operation_code_str + '\n' + solution.code_for_assembly
exec(all_code)

  exec(all_code)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: PROJCS["unknown",GEOGCS["GCS_unknown",DATUM["North ...
Right CRS: EPSG:3857



Total population in tracts with hazardous waste facilities: 0


KeyError: 'GEOID'

In [None]:
print(all_code)