# Imports

In [13]:
import sys, os, importlib, pickle

path2add = os.path.normpath(os.path.abspath(os.path.join(os.getcwd(), os.path.pardir, 'sql_generator')))

if (not (path2add in sys.path)) :
    sys.path.append(path2add)

#print(path2add)

#import index
#import llm
#import db
#import util

from index import Index
from llm import LLM, PromptGenerator


In [5]:
def reimport_all():
    import index
    import llm
    import db
    import util
    importlib.reload(index)
    importlib.reload(llm)
    importlib.reload(db)
    importlib.reload(util)

# Index Data

## Extract DB Metadata

In [6]:
import db
importlib.reload(db)
from db import DBConnectionFactory

db_conn = DBConnectionFactory.get_db_connection(db_type='postgres',
                      db_name='dvdrental',
                      db_host='localhost',
                      db_port=os.environ['POSTGRES_L_PORT'], 
                      db_user=os.environ['POSTGRES_USER'] ,
                      db_password=os.environ['POSTGRES_PASSWORD'])
db_list = db_conn.get_database_list()
print(db_list)



['adventureworks', 'dvdrental']


In [7]:
db_metadata_all = {}

for db_name in db_list:
    db_conn.set_curr_database(db_name)
    df_db_metadata = db_conn.get_metadata()
    db_metadata_all[db_name] = df_db_metadata

print(db_metadata_all)

{'adventureworks': <db.DBMetadata object at 0x3346669d0>, 'dvdrental': <db.DBMetadata object at 0x1112f0c50>}


## Create Index in Fuzzy Search

In [8]:
import search
importlib.reload(search)
importlib.reload(db)
from search import SearchFactory, SearchTypes
db_conn1 = DBConnectionFactory.get_db_connection(db_type='postgres',
                      db_name='sql_generator',
                      db_host='localhost',
                      db_port=os.environ['POSTGRES_L_PORT'], 
                      db_user=os.environ['POSTGRES_USER'] ,
                      db_password=os.environ['POSTGRES_PASSWORD'],
                      read_only=False)

my_search = SearchFactory.get_search_provider(SearchTypes.FUZZY_SEARCH, db_conn=db_conn1)

for key, value in db_metadata_all.items():
    print(key)
    my_search.create_index(key, value)
    


adventureworks
dvdrental




# Search in Database Metadata

In [15]:
df_metadata = pickle.loads(db_conn1.load_file("adventureworks_table_columns.pkl"))

df_metadata.head()

<memory at 0x334b2a680>


Unnamed: 0,table_name,column_name


In [100]:
query = "What is the total revenue for top 5 cutomers?"

test_result = my_search.search_by_query(db_name='dvdrental', query=query, similarity_threshold=80, max_synonyms=5)


print(test_result)

['revenue', '5', 'cutomers', 'cutomer']
Generating synonyms for: revenue
Generating synonyms for: 5
Generating synonyms for: cutomers
Generating synonyms for: cutomer
                    table_name  \
0                        actor   
1                   actor_info   
2                      address   
3                     category   
4                         city   
5                      country   
6                     customer   
7                customer_list   
8                         film   
9                   film_actor   
10               film_category   
11                   film_list   
13                    language   
14  nicer_but_slower_film_list   
15                     payment   
16                      rental   
17      sales_by_film_category   
18              sales_by_store   
19                       staff   
20                  staff_list   
21                       store   

                                          column_name  
0      [actor_id, first_name

In [13]:
import db
importlib.reload(db)
from db import DBConnectionFactory

db_meta = DBConnectionFactory.get_db_connection(db_type='postgres',
                      db_name='dvdrental',
                      db_host='localhost',
                      db_port=os.environ['POSTGRES_L_PORT'], 
                      db_user=os.environ['POSTGRES_USER'] ,
                      db_password=os.environ['POSTGRES_PASSWORD'])



In [14]:
print(db_meta.get_database_list())
db_meta.set_curr_database('dvdrental')

           0
2  dvdrental


AttributeError: 'PostgresConnection' object has no attribute 'search_by_query'

# RAG Flow

In [4]:
import llm
reimport_all()

from llm import LLM, PromptGenerator
template_path = os.path.normpath(os.path.abspath(os.path.join(os.getcwd(), os.path.pardir, 'sql_generator/templates')))
config_path = os.path.normpath(os.path.abspath(os.path.join(os.getcwd(), os.path.pardir, 'sql_generator/config/llm.yaml')))
pg = PromptGenerator(template_path)

my_llm = LLM(config_path)
models = my_llm.get_model_list()
print(models)


['OpenAI: gpt-4o-mini', 'OpenAI: gpt-3.5-turbo', 'TBD: tbd']


In [5]:
import db
importlib.reload(db)
from db import DBConnectionFactory
db_conn = DBConnectionFactory.\
    get_db_connection(\
        db_type='postgres',
        db_name = os.environ['POSTGRES_DB'],
        db_host='localhost',
        db_port = os.environ['POSTGRES_L_PORT'], 
        db_user = os.environ['POSTGRES_USER'] ,
        db_password = os.environ['POSTGRES_PASSWORD']
    )

In [14]:
import search
importlib.reload(search)
importlib.reload(db)
from search import SearchFactory, SearchTypes
my_search = SearchFactory.get_search_provider(SearchTypes.FUZZY_SEARCH, db_conn=db_conn)


In [12]:

#query = "What is the total revenue for top 5 cutomers?"

query = "What were the most popular films in July 2005?"
#query = "Are semicolons optional in JavaScript?"


#query = "Show me actor's first_name, last_name that have Nick, Ed and Jennifer as their firstnames"

In [26]:
discovered_tables = my_search.search_by_query(db_name='adventureworks', query=query, similarity_threshold=80, max_synonyms=5)
#print(discovered_tables)


<memory at 0x107b17e80>
['films', 'July', '2005', 'film']
Generating synonyms for: films
Generating synonyms for: July
Generating synonyms for: 2005
Generating synonyms for: film


In [24]:
prompt = pg.get_prompt(template_name='basic_prompt', schema=discovered_tables, instruction=query)
#print(prompt)

In [22]:
response = my_llm.prompt(models[0], prompt)
#print(response)

gpt-4o-miniHere is a database schema: 
                    table_name  \
1                   actor_info   
8                         film   
9                   film_actor   
10               film_category   
11                   film_list   
12                   inventory   
14  nicer_but_slower_film_list   
17      sales_by_film_category   

                                          column_name  
1        [actor_id, film_info, first_name, last_name]  
8   [description, film_id, fulltext, language_id, ...  
9                    [actor_id, film_id, last_update]  
10                [category_id, film_id, last_update]  
11  [actors, category, description, fid, length, p...  
12     [film_id, inventory_id, last_update, store_id]  
14  [actors, category, description, fid, length, p...  
17                            [category, total_sales]  
  Please write me a syntactically correct SQL statement that answers the following question: What were the most popular films in July 2005?
  It is no

In [42]:
import re
response = my_llm.rag_query(query, related_tables, idx_minsearch)
print(response)
sql_pattern = r"<SQL>([\s\S]*?)(?=<\/SQL>)"
sql = str(re.search(sql_pattern, response).group(1)).strip()
print(sql)

NameError: name 'related_tables' is not defined

In [14]:
from openai import OpenAI    
llm = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
response = llm.chat.completions.create(
    model='gpt-4o-mini',
    messages=[
            #{
            #    "role": "system",
            #    "content": "You are a helpful assistant."
            #},
            {"role": "user", "content": query}
        ],
        stream=False
        )

In [15]:
print(response.choices[0].message.content)

Yes, semicolons are technically optional in JavaScript due to a feature called Automatic Semicolon Insertion (ASI). JavaScript interpreters automatically insert semicolons in certain situations where they are omitted, which can help prevent errors in many cases.

However, relying on ASI can lead to unexpected behavior and hard-to-debug issues. Here are some important points to consider:

1. **When ASI Works**: In many cases, ASI will correctly insert semicolons for you. For example:
   ```javascript
   let a = 5
   let b = 10
   console.log(a + b) // ASI inserts semicolons here
   ```

2. **Potential Pitfalls**: There are scenarios where ASI can lead to unexpected results:
   ```javascript
   function example() {
       return
       {
           key: 'value'
       }
   }
   ```
   In this case, the `return` statement will be interpreted as `return;` (returning `undefined`) because of a newline after `return`.

3. **Best Practices**: It is generally recommended to use semicolons consi