In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

DB_SCHEMA = """
Database: PostgreSQL 16 with PostGIS

Tables:
- buildings
  - bin: building id number
  - base_bbl
  - mapp_bbl
  - doitt_id
  - feat_code
  - laststatus
  - borocode
  - shape_area
  - shape_leng
  - geom
  - built_year
  - ground_ele: Ground elevation
  - heightroof
  - small_n: small neighborhood
  - small_n_a: small neighborhood abbreviation
"""

def make_user_input(query: str) -> list:
    return [
        {
            "role": "system",
            "content": "You translate natural-language questions into the most relevant column in the PostgreSQL db table: buildings."
        },
        {
            "role": "user",
            "content": f"Schema:\n{DB_SCHEMA}\n\nTask:provide exactly one word output: the column name based on the query'{query}'"
				}
    ]

def get_column_from_query():
    q = input("Query: ").strip()
    resp = client.responses.create(
        model="gpt-5-nano",
        input=make_user_input(q)
    )
    
    column = resp.output_text.strip()
    usage = resp.usage
    return {
        "column": column,
        "usage": {
            "total": usage.total_tokens,
            "input": usage.input_tokens,
            "output": usage.output_tokens
        }
    }

small_n
user query=i want to find out which place in nyc I wanna live
tokens_total=560
tokens_input=168
tokens_output=392


In [10]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv

In [13]:
load_dotenv(dotenv_path="../.env")
db_url = os.getenv('db_url')
engine = create_engine(db_url)

In [23]:
gdf = gpd.read_postgis(
    sql="SELECT * FROM public.buildings",
    con=engine,
    geom_col="geom"
)

gdf

Unnamed: 0,bin,base_bbl,mapp_bbl,doitt_id,feat_code,laststatus,borocode,shape_area,shape_leng,geom,built_year,ground_ele,heightroof,small_n,small_n_a
0,4124976,4055350020,4055350020,385315,2100,Constructed,4,162.261719,60.215267,"MULTIPOLYGON (((-73.78742 40.75496, -73.78742 ...",1940.0,77.0,32.920435,Auburndale,Abrndl
1,5167115,5002060006,5002060006,1270638,5110,Constructed,5,52.515625,29.469501,"MULTIPOLYGON (((-74.11766 40.63428, -74.11765 ...",,56.0,10.955158,Port Richmond,PrtRchmnd
2,2050738,2043890020,2043890020,551813,2100,Constructed,2,163.703125,57.481184,"MULTIPOLYGON (((-73.84427 40.85932, -73.84427 ...",1940.0,49.0,30.010000,Pelham Gardens,PlhmGrdns
3,4535838,4160580035,4160580035,943382,2100,Constructed,4,171.039062,57.171803,"MULTIPOLYGON (((-73.80302 40.59673, -73.80303 ...",1999.0,6.0,28.258209,Rockaway Beach-Arverne-Edgemere,RckwyBch
4,3125970,3053700026,3053700026,122812,2100,Constructed,3,185.371094,58.549030,"MULTIPOLYGON (((-73.97777 40.63942, -73.97791 ...",1920.0,49.0,34.150000,Kensington,Knsngtn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1082077,2000892,2023300018,2023300018,342308,2100,Constructed,2,238.738281,84.037423,"MULTIPOLYGON (((-73.92167 40.81697, -73.92169 ...",1931.0,24.0,47.230000,Melrose,Mlrs
1082078,4225558,4105740062,4105740062,758134,2100,Constructed,4,115.597656,46.398104,"MULTIPOLYGON (((-73.75463 40.72023, -73.75475 ...",1940.0,73.0,28.259327,Queens Village,QnsVlg
1082079,3048363,3017300011,3017300011,605175,2100,Constructed,3,181.753906,62.644689,"MULTIPOLYGON (((-73.94918 40.69823, -73.94916 ...",1931.0,15.0,38.410000,Bedford-Stuyvesant (West),BdSty_W
1082080,3153647,3060480002,3060480002,273187,2100,Constructed,3,158.523438,59.587741,"MULTIPOLYGON (((-74.03387 40.62279, -74.03388 ...",1899.0,71.0,32.200000,Bay Ridge,ByRdg


In [24]:
gdf['small_n'].unique()

array(['Auburndale', 'Port Richmond', 'Pelham Gardens',
       'Rockaway Beach-Arverne-Edgemere', 'Kensington',
       'Oakwood-Richmondtown', 'Westerleigh-Castleton Corners',
       'Jackson Heights', 'Glendale',
       'Todt Hill-Emerson Hill-Lighthouse Hill-Manor Heights',
       'South Ozone Park', 'Mount Hope', 'Bay Ridge',
       'Sheepshead Bay-Manhattan Beach-Gerritsen Beach',
       'Kew Gardens Hills', 'Tribeca-Civic Center', 'Woodhaven',
       'Kingsbridge-Marble Hill', 'Wakefield-Woodlawn', 'Maspeth',
       'Springfield Gardens (North)-Rochdale Village',
       'Douglaston-Little Neck', 'Riverdale-Spuyten Duyvil', 'Madison',
       'Bushwick (West)', 'Astoria (East)-Woodside (North)',
       'Coney Island-Sea Gate', 'Flushing-Willets Point',
       'East New York (North)', 'Middle Village', 'St. Albans',
       'Whitestone-Beechhurst', 'Marine Park-Mill Basin-Bergen Beach',
       'Oakland Gardens-Hollis Hills', 'College Point',
       'Mapleton-Midwood (West)', 'Bensonhu