• Create a Gradio app using LangChain with the following features:
1. SQL Query Generation: Generate SQL queries from natural language.
2. SQL Safety Check: Implement basic safety checks for SQL queries to prevent
malicious input.
3. Conversational Memory: Add multi-round conversational capabilities with
a chat box and history.
4. Question-Answering System: Build a complete question-answering system
for the course schedule.

• The Gradio app must include:
* A conversation box that displays the full chat history.
* The ability to handle multi-round interactions for context-aware querying.

#### Change the column names to make it easire to understand

In [73]:
import pandas as pd
import sqlite3

# Load the data from the database
query = "SELECT * FROM courses"
conn = sqlite3.connect("courses.db")

df = pd.read_sql(query, conn)

# Rename columns to be more intuitive
df = df.rename(columns={
    "Mon": "Monday_start",
    "MonTo": "Monday_end",
    "Tue": "Tuesday_start",
    "TueTo": "Tuesday_end",
    "Wed": "Wednesday_start",
    "WedTo": "Wednesday_end",
    "Thu": "Thursday_start",
    "ThuTo": "Thursday_end",
    "Fri": "Friday_start",
    "FriTo": "Friday_end",
    "Sat": "Saturday_start",
    "SatTo": "Saturday_end"
})

# Save the modified DataFrame back to the database
df.to_sql("courses", conn, if_exists="replace", index=False)


135

#### Set up models

In [75]:
# Configure LLM options
from langchain_ollama import ChatOllama
import os
from langchain_community.llms import DeepInfra
from langchain_openai import ChatOpenAI

# Local LLM setup
local_model = ChatOllama(model="qwen2.5:7b")
# local_model = ChatOllama(model = "phi3.5:3.8b-mini-instruct-q8_0")

# API setup (optional)
os.environ["DEEPINFRA_API_TOKEN"] = str(os.environ.get("DEEPINFRA_API_KEY"))
os.environ["OPENAI_API_KEY"] = str(os.environ.get("OPENAI_API_KEY"))
# deepinfra = DeepInfra(model_id="meta-llama/Meta-Llama-3.1-70B-Instruct")

# api_model = None ### 
# https://api.python.langchain.com/en/latest/llms/langchain_community.llms.deepinfra.DeepInfra.html

# Choose which model to use
# llm = DeepInfra(model_id="meta-llama/Meta-Llama-3.1-70B-Instruct")  # Switch to api_model if using Replicate
llm = local_model  # Switch to api_model if using Replicate
llm = ChatOpenAI(model="gpt-4o-mini")

#### Prepare some functions that would be used later

In [77]:
from langchain_community.utilities import SQLDatabase

# Initialize database connection
db = SQLDatabase.from_uri("sqlite:///courses.db", sample_rows_in_table_info=0)

# Helper functions
def get_schema(_):
    """Retrieve database schema"""
    return db.get_table_info()

def run_query(query):
    """Execute SQL query"""
    return db.run(query)

In [78]:
def parse(string):
    """To parse the responce of llm, get the pure SQL out from ```sql <SQL>```"""
    
    if "```" in string:
        string = string.split("```")
        string = string[1].replace("sql","")
        
    return string

#### Prepare info needed for prompt

In [None]:
# Load the data from the database into a pandas DataFrame
df = pd.read_sql("SELECT * FROM courses", conn)
# Create a dictionary with column names as keys and unique values as values
column_unique_values = {col: df[col].unique().tolist() for col in df.columns}
print(column_unique_values)

import json

# my_dict = {"name": "Alice", "age": 25, "city": "New York"}
column_unique_values_str = json.dumps(column_unique_values)

print(column_unique_values_str)
# print(type(dict_string))  # Output: <class 'str'>

### SQL from natural language

In [79]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# SQL generation prompt
sql_prompt_template = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Question: {question}
SQL Query:"""

system_prompt = "Your are an helpful assistence that helps answer questions about course on Spring Semeter of 2025. \
                All needed information is stored in course.db"

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("system", "Given an input question, convert it to a SQL query. No pre-amble."),
    ("human", sql_prompt_template),
])

# Build query generation chain
sql_generator = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
    | parse
)

In [106]:
# question = "What courses does Micheal Zhao teaches"
# question = " What are avaliale Departments?"
question = "Which courses are available in the Biomedical sciemce department for Spring 2025?"
sql = sql_generator.invoke({"question": question})
print(str(sql))


SELECT * FROM courses WHERE "Department" = 'Biomedical Science';



In [92]:
db.run(sql)
result = db.run(sql)
print(result)
type(result)

[('Applied Math & Statistics',), ('ART',), ('Biomedical Science (BMS)',), ('Computer Science (CS), Computer Networks and Cybersecurity (CNCS)',), ('Data Science (DS)',), ('General Education (GE)',), ('Dance ',)]


str

In [87]:
type(db.run(sql))

str