### Langchain


%%capture
pip install langchain google-generativeai



## Key   
vanna e71fe3fe42d9461b8ae53f1ff7b7321d
gemini AIzaSyDXcXJMv35uDfVrqWNoelcWo7-uB4LAz0Y


## imports

In [53]:
import os
import pandas as pd
import psycopg2
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Set up API key for Gemini
os.environ["GOOGLE_API_KEY"] = "AIzaSyDXcXJMv35uDfVrqWNoelcWo7-uB4LAz0Y"


### Initialize LLM

In [None]:

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro",
    temperature=0.3
),

### PostgreSQL

In [None]:

conn = psycopg2.connect(
    dbname="postgres",
    user="postgres",
    password=input("Enter PostgreSQL password: "),
    host="localhost",
    port="5432"
)


### Prompt

In [80]:

# User query
user_question = "Show last 5 year columns on views table"
format_instructions = (
    "Before using `yearmon` in date comparisons, convert it to DATE format using if it not already in date format: "
    "TO_DATE(yearmon, 'Mon YYYY'). where yearmon is in 'Mon YYYY' format, and convert it to a date"
    "If a permanent conversion is needed, use: "
    "ALTER TABLE data.<table_name> ALTER COLUMN yearmon TYPE DATE USING TO_DATE(yearmon, 'Mon YYYY'); "
    "Do not use TO_CHAR for comparison. Once converted, use standard date filtering like: "
    "WHERE yearmon >= (NOW() - INTERVAL '5 years')::date. "
    "Assume all indicator columns (e.g., IMF_COMPRICES_...) are in UPPERCASE and of type REAL. "
    "Treat `pop` as the population column, `iso3` as the country code"
    "Treat the columns `pop`, `iso3`, and `region_code` as lowercase. "
    "Do NOT compare numeric columns (like indicators or `pop`) to strings or empty values. Use `IS NOT NULL` to filter missing values. "
    "Do NOT cast numeric columns to other types unnecessarily—assume all indicator columns and `pop` are already numeric. "
    "Use double quotes ONLY when necessary (e.g., for UPPERCASE column names in PostgreSQL). "
    "Ensure that the SQL is syntactically valid, safe, and PostgreSQL-compatible. "
    "If aggregation is needed, use standard SQL functions like `AVG`, `SUM`, `MAX`, `MIN`, and ensure to `GROUP BY` as required. "
    "Write clean, readable, and logically ordered SQL queries that meet the user’s intent."

)

# Compose prompt
prompt = f"""
Schema: 'data'
User query: {user_question}

Format instructions:
{format_instructions}
"""


### Invoke LLM to generate SQL

In [81]:

response = llm.invoke([HumanMessage(content=prompt)])
print("Generated SQL:", response.content)


Generated SQL: ```sql
SELECT *
FROM data.views
WHERE TO_DATE(yearmon, 'Mon YYYY') >= (NOW() - INTERVAL '5 years')::date;
```


### Running the SQL prompt

In [None]:
cleaned_sql = response.content.strip().strip("```sql").strip("```")
from sqlalchemy import create_engine
import pandas as pd

password = input("Enter PostgreSQL password: ")

engine = create_engine("postgresql+psycopg2://postgres:{password}@localhost:5432/postgres")

# Run SQL safely
try:
    df = pd.read_sql_query(cleaned_sql, engine)
    print(df.head())
except Exception as e:
    print("Error running SQL:", e)



  iso3   yearmon region_code         pop VIEWS_main_dich VIEWS_main_mean  \
0  AFG  Jul 2020         SAR  39068980.0                                   
1  AFG  Aug 2020         SAR  39068980.0                                   
2  AFG  Sep 2020         SAR  39068980.0                                   
3  AFG  Oct 2020         SAR  39068980.0                                   
4  AFG  Nov 2020         SAR  39068980.0                                   

  VIEWS_main_mean_ln  VIEWS_PRED_ucdp_ged_ns_best_sum  \
0                                                 0.0   
1                                                 0.0   
2                                                 0.0   
3                                                 0.0   
4                                                 0.0   

   VIEWS_PRED_ucdp_ged_os_best_sum  VIEWS_PRED_ucdp_ged_sb_best_sum  ...  \
0                             18.0                           3115.0  ...   
1                             12.0              