# Text2Sql

In [1]:
from openai import OpenAI
import os
import psycopg2
import instructor
from pydantic import BaseModel

from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain

In [2]:
postgres_username = os.getenv("POSTGRES_USERNAME")
postgres_pwd = os.getenv("POSTGRES_PASSWORD")

In [3]:
print(postgres_username)

postgres


In [6]:
client = instructor.from_openai(OpenAI())
db_chain = SQLDatabase.from_uri(f"postgresql+psycopg2://{postgres_username}:{postgres_pwd}@host.docker.internal:5433/postgresdb")

In [7]:
class SQLQuery(BaseModel):
    query: str

In [37]:
# Text2Sql

query = "What is the top 5 attraction location in the US?"

prompt = f"""
You are given a database schema:
    Schema: public
    Table: us_attractions
    Columns:
    - name VARCHAR(250)
    - main_category VARCHAR(250)
    - rating REAL
    - reviews REAL
    - categories VARCHAR(250)
    - address VARCHAR(250)
    - city VARCHAR(250)
    - country VARCHAR(250)
    - state VARCHAR(250)
    - zipcode INTEGER
    - broader_category VARCHAR(250)
    - weighted_score REAL
    - weighted_average REAL
    - all_cities VARCHAR(250)

The values under the country column are all "USA".    

Translate the following query into SQL:

"{query}"

Write SQL query using the "public" schema for all tables (e.g., public.us_attraction).

"""

# print(prompt)

In [38]:
response, raw_response = client.chat.completions.create_with_completion(
    model="gpt-4.1",
    response_model=SQLQuery,
    messages=[{"role":"user", "content": prompt}],
    temperature=0
)

In [20]:
# raw_response

In [39]:
response

SQLQuery(query='SELECT name, city, state, rating, reviews, weighted_score\nFROM public.us_attractions\nORDER BY weighted_score DESC\nLIMIT 5;')

In [34]:
response.query

'SELECT name, city, state, rating, reviews, weighted_score \nFROM public.us_attraction\nORDER BY weighted_score DESC\nLIMIT 5;'

In [43]:
conn = psycopg2.connect(
    dbname="postgresdb",
    user=postgres_username,
    password=postgres_pwd,
    host="host.docker.internal",  # e.g., "localhost"
    port="5433"        # default PostgreSQL port
)
cursor = conn.cursor()

In [44]:
generated_sql = response.query

cursor.execute(generated_sql)
rows = cursor.fetchall()

for row in rows:
    print(row)


('Central Park', 'New-york-city', 'NY', 4.8, 272065.0, 1305912.0)
('Magic Kingdom Park', 'Orlando', 'FL', 4.6, 221929.0, 1020873.4)
('Universal Orlando Resort', 'Orlando', 'FL', 4.7, 178775.0, 840242.5)
('Rockefeller Center', 'New-york-city', 'NY', 4.7, 175156.0, 823233.2)
('Universal Studios Hollywood', 'Los-angeles', 'CA', 4.6, 153543.0, 706297.8)


In [None]:
conn.close()