In [17]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

In [82]:
prc = pd.read_csv('../sf_parcels.csv.gz')
prc['shape'] = prc['shape'].apply(wkt.loads)
prc = gpd.GeoDataFrame(prc, geometry='shape')

zn = pd.read_csv('../sf_zoning.csv.gz')
zn['the_geom'] = zn['the_geom'].apply(wkt.loads)
zn = gpd.GeoDataFrame(zn, geometry='the_geom')

prc

Unnamed: 0,mapblklot,blklot,block_num,lot_num,from_address_num,to_address_num,street_name,street_type,odd_even,in_asr_secured_roll,...,date_rec_add,date_rec_drop,date_map_add,date_map_drop,date_map_alt,project_id_add,project_id_drop,project_id_alt,active,shape
0,0001001,0001001,0001,001,0.0,0.0,UNKNOWN,,E,True,...,,,1998-07-01,,,ORIG_BASEMAP,,,True,"MULTIPOLYGON (((-122.42200 37.80848, -122.4220..."
1,0002001,0002001,0002,001,0.0,0.0,UNKNOWN,,E,True,...,,,1998-07-01,,,ORIG_BASEMAP,,,True,"MULTIPOLYGON (((-122.42083 37.80863, -122.4208..."
2,0004002,0004002,0004,002,160.0,160.0,JEFFERSON,ST,E,True,...,,,1998-07-01,,,ORIG_BASEMAP,,,True,"MULTIPOLYGON (((-122.41570 37.80833, -122.4157..."
3,0005001,0005001,0005,001,206.0,206.0,JEFFERSON,ST,E,True,...,,,1998-07-01,,,ORIG_BASEMAP,,,True,"MULTIPOLYGON (((-122.41735 37.80812, -122.4174..."
4,0006001,0006001,0006,001,350.0,366.0,JEFFERSON,ST,E,True,...,,,1998-07-01,,,ORIG_BASEMAP,,,True,"MULTIPOLYGON (((-122.41897 37.80791, -122.4191..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235048,0977012,0977C004,0977C,004,2872.0,2888.0,JACKSON,ST,E,True,...,,,,,,,,,True,"MULTIPOLYGON (((-122.44253 37.79155, -122.4425..."
235049,0977012,0977C005,0977C,005,2872.0,2888.0,JACKSON,ST,E,True,...,,,,,,,,,True,"MULTIPOLYGON (((-122.44253 37.79155, -122.4425..."
235050,0977012,0977C006,0977C,006,2872.0,2888.0,JACKSON,ST,E,True,...,,,,,,,,,True,"MULTIPOLYGON (((-122.44253 37.79155, -122.4425..."
235051,0977012,0977C007,0977C,007,2872.0,2888.0,JACKSON,ST,E,True,...,,,,,,,,,True,"MULTIPOLYGON (((-122.44253 37.79155, -122.4425..."


In [80]:
my_house = prc[(prc['street_name'] == 'NOE')
               & (prc['from_address_num'] <= 483)
               & (483 <= prc['to_address_num'])]

my_zoning = gpd.sjoin(my_house, zn, how="inner", predicate="intersects")
len(my_zoning)

1

In [69]:
from typing import List, Optional
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
import dotenv
dotenv.load_dotenv()

model = ChatOpenAI(model="gpt-3.5-turbo-1106")

class Address(BaseModel):
    number: str = Field(description="the house number of the address")
    street: str = Field(description="the street the address is on, in all caps, without any suffix like st or rd")

class AddressResponse(BaseModel):
    addresses: List[Address] = Field(description="all addresses found in the query")

template_str = """
Extract addresses from the following query.

{query}

{format_instructions}
"""

parser = PydanticOutputParser(pydantic_object=AddressResponse)
prompt = PromptTemplate(
    template=template_str,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

query = "What can I build at 483 Noe st?"
prompt_and_model = prompt | model
output = prompt_and_model.invoke(
    {
        "query": query,
    }
)
response = parser.invoke(output)

[a for a in response.addresses if a.number in query]

[Address(number='483', street='NOE')]