<a href="https://colab.research.google.com/github/avikumart/LLM-GenAI-Transformers-Notebooks/blob/main/NL2SQL_With_LlamaIndex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setting up logging credentials

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from IPython.display import Markdown, display

In [None]:
!pip install pymysql



In [None]:
!pip install llama_index

In [None]:
pip install --force-reinstall 'sqlalchemy<2.0.0'

In [None]:
import pymysql

## Connect to Database

In [None]:
db_user = "your_user_name"
db_password = "*********"
db_host = "your_host_address"
db_name = "name_of_db"

In [None]:
from sqlalchemy import create_engine, text

# Construct the connection string
connection_string = f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}"

# Create an engine instance
engine = create_engine(connection_string)

# Test the connection using raw SQL
with engine.connect() as connection:
    result = connection.execute(text("show tables"))
    for row in result:
        print(row)

('Customers',)
('OrderDetails',)
('Orders',)
('info',)
('table_36370395d9314572970b325bee42817d',)
('table_59af3c5a5388482193f88093fc2eaa36',)


## Load database to llamaIndex SQLDatabase

In [None]:
from llama_index.core import SQLDatabase

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
NumExpr defaulting to 2 threads.


In [None]:
tables = ['table_36370395d9314572970b325bee42817d', 'table_59af3c5a5388482193f88093fc2eaa36']
sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=2)
sql_database

<llama_index.core.utilities.sql_wrapper.SQLDatabase at 0x7de656bd2a40>

In [None]:
sql_database._all_tables

{'Customers',
 'OrderDetails',
 'Orders',
 'info',
 'table_36370395d9314572970b325bee42817d',
 'table_59af3c5a5388482193f88093fc2eaa36'}

In [None]:
sql_database.get_single_table_info

## Connect to OpenAI account

In [None]:
import os
import openai

In [None]:
openai.api_key = "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"

In [None]:
!pip install llama-index-callbacks-aim


## Connect to LLM and initialize the service context

In [None]:
import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)

callback_manager = CallbackManager([token_counter])

In [None]:
from llama_index.core.indices.service_context import ServiceContext
from llama_index.embeddings.openai import OpenAIEmbedding
#import llama_index.indices.prompt_helper.PromptHelper
from llama_index.llms.openai import OpenAI
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo")

service_context = ServiceContext.from_defaults(
  llm=llm,callback_manager=callback_manager
)

  service_context = ServiceContext.from_defaults(


## Create SQL table node mapping

In [None]:
#creating SQL table node mapping
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex, SQLTableNodeMapping, SQLTableSchema
import pandas as pd

# list all the tables from database and crate table schema for prompt to LLM
tables = list(sql_database._all_tables)
table_node_mapping = SQLTableNodeMapping(sql_database)
table_schema_objs = []
for table in tables:
    table_schema_objs.append((SQLTableSchema(table_name = table)))

In [None]:
print(table_schema_objs)

[SQLTableSchema(table_name='table_59af3c5a5388482193f88093fc2eaa36', context_str=None), SQLTableSchema(table_name='Orders', context_str=None), SQLTableSchema(table_name='table_36370395d9314572970b325bee42817d', context_str=None), SQLTableSchema(table_name='OrderDetails', context_str=None), SQLTableSchema(table_name='info', context_str=None), SQLTableSchema(table_name='Customers', context_str=None)]


## Initialize SQL Table Retrieval Engine

In [None]:
from llama_index.core.indices.struct_store.sql_query import SQLTableRetrieverQueryEngine

obj_index = ObjectIndex.from_objects(
    table_schema_objs,
    table_node_mapping,
    VectorStoreIndex
)

In [None]:
query_engine = SQLTableRetrieverQueryEngine(
    sql_database, obj_index.as_retriever(similarity_top_k=3), service_context=service_context
)

In [None]:
response = query_engine.query("How many people have previous work experience?")

In [None]:
print(response)

## Few-shot examples prompting

In [None]:
# select few examples
  examples = [
      {
          "input": "List all customers in France with a credit limit over 20,000.",
          "query": "SELECT * FROM customers WHERE country = 'France' AND creditLimit > 20000;"
      },
      {
          "input": "Get the highest payment amount made by any customer.",
          "query": "SELECT MAX(amount) FROM payments;"
      },
     .....
  ]


In [None]:
# few shot examples prompt using llamaIndex prompt tempelates
 from llama_index.core import PromptTemplate
 from llama_index.llms.openai import OpenAI

 # Example prompt for SQL query generation
 qa_prompt_tmpl_str = """\
 Context information is below.
 ---------------------
 ("human", "{input}\nSQLQuery:"),
 ("ai", "{query}") \,
 """
 # Variable mapping to generate templates
 template_var_mappings = {"user_queries": "input", "ai_sql": "query"}

 # Prompt template
 prompt_tmpl = PromptTemplate(
     qa_prompt_tmpl_str, template_var_mappings=template_var_mappings
 )

 # format prompt with user queries and sql queries
 fmt_prompt = partial_prompt_tmpl.format(
     user_queries="List all customers in France with a credit limit over 20,000.",
     ai_sql="SELECT * FROM customers WHERE country = 'France' AND creditLimit > 20000;",
 )
 print(fmt_prompt)

In [None]:
# Dynamic few shot examples selection
from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI

gpt35_llm = OpenAI(model="gpt-3.5-turbo")

# set up an index of few shot examples
index = VectorStoreIndex.from_documents(examples)

# query string
query_str = "How many employees we have?"

# retrieve vectors from index to get most relevant examples
query_engine = index.as_query_engine(similarity_top_k=2, llm=gpt35_llm)

# print the few shot examples further used for few shot prompting
response_prompt = query_engine.query(query_str)