In [17]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

In [21]:
prc = pd.read_csv('../sf_parcels.csv')
prc['shape'] = prc['shape'].apply(wkt.loads)
prc = gpd.GeoDataFrame(prc, geometry='shape')

zn = pd.read_csv('../sf_zoning.csv')
zn['the_geom'] = zn['the_geom'].apply(wkt.loads)
zn = gpd.GeoDataFrame(zn, geometry='the_geom')

prc, zn

(       mapblklot    blklot block_num lot_num  from_address_num  \
 0        0001001   0001001      0001     001               0.0   
 1        0002001   0002001      0002     001               0.0   
 2        0004002   0004002      0004     002             160.0   
 3        0005001   0005001      0005     001             206.0   
 4        0006001   0006001      0006     001             350.0   
 ...          ...       ...       ...     ...               ...   
 235048   0977012  0977C004     0977C     004            2872.0   
 235049   0977012  0977C005     0977C     005            2872.0   
 235050   0977012  0977C006     0977C     006            2872.0   
 235051   0977012  0977C007     0977C     007            2872.0   
 235052   0977012  0977C008     0977C     008            2872.0   
 
         to_address_num street_name street_type odd_even  in_asr_secured_roll  \
 0                  0.0     UNKNOWN         NaN        E                 True   
 1                  0.0     UNKN

In [76]:
my_house = prc[(prc['street_name'] == 'NOE')
               & (prc['from_address_num'] <= 483)
               & (483 <= prc['to_address_num'])]

my_zoning = gpd.sjoin(my_house, zn, how="inner", predicate="intersects")
my_zoning

Unnamed: 0,mapblklot,blklot,block_num,lot_num,from_address_num,to_address_num,street_name,street_type,odd_even,in_asr_secured_roll,...,project_id_alt,active,shape,index_right,zoning_sim,districtname,url,gen,zoning,codesection
130405,3581123,3581124,3581,124,483.0,483.0,NOE,ST,O,True,...,,True,"MULTIPOLYGON (((-122.43268 37.76132, -122.4326...",1115,RH-3,"RESIDENTIAL- HOUSE, THREE FAMILY",https://codelibrary.amlegal.com/codes/san_fran...,Residential,RH-3,209.1


In [69]:
from typing import List, Optional
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
import dotenv
dotenv.load_dotenv()

model = ChatOpenAI(model="gpt-3.5-turbo-1106")

class Address(BaseModel):
    number: str = Field(description="the house number of the address")
    street: str = Field(description="the street the address is on, in all caps, without any suffix like st or rd")

class AddressResponse(BaseModel):
    addresses: List[Address] = Field(description="all addresses found in the query")

template_str = """
Extract addresses from the following query.

{query}

{format_instructions}
"""

parser = PydanticOutputParser(pydantic_object=AddressResponse)
prompt = PromptTemplate(
    template=template_str,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

query = "What can I build at 483 Noe st?"
prompt_and_model = prompt | model
output = prompt_and_model.invoke(
    {
        "query": query,
    }
)
response = parser.invoke(output)

[a for a in response.addresses if a.number in query]

[Address(number='483', street='NOE')]