In [1]:
from cables_and_dividers_communicator import generate_sql_query_cable_and_divider_anthropic,generate_sql_query_cable_and_divider_google, generate_sql_query_cable_and_divider_mistral
from gaming_laptop_communicator import generate_sql_query_gaming_laptop_anthropic,generate_sql_query_gaming_laptop_google, generate_sql_query_gaming_laptop_mistral
from laptop_communicator import generate_sql_query_laptop_anthropic,generate_sql_query_laptop_google, generate_sql_query_laptop_mistral
from mobile_phone_communicator import generate_sql_query_mobile_phone_anthropic,generate_sql_query_mobile_phone_google, generate_sql_query_mobile_phone_mistral
from monitors_communicator import generate_sql_query_monitor_anthropic,generate_sql_query_monitor_google, generate_sql_query_monitor_mistral
from mouses_communicator import generate_sql_query_mouse_anthropic,generate_sql_query_mouse_google, generate_sql_query_mouse_mistral
from tvs_communicator import generate_sql_query_tv_anthropic,generate_sql_query_tv_google, generate_sql_query_tv_mistral
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import re

  from .autonotebook import tqdm as notebook_tqdm


#### Setting up zero-shot classificator pipeline

In [2]:
pipe = pipeline(model="facebook/bart-large-mnli")
possible_labels = ["cables and dividers", "gaming laptops", "laptop", "mouses", "tvs", "monitors", "mobile phones"]




#### Zero-shot classifier Example

In [24]:
input_text = "What phones are there?"
possible_labels = ["cables and dividers", "gaming laptops", "laptop", "mouses", "tvs", "monitors", "mobile phones"]

# Perform zero-shot classification
result = pipe(input_text, possible_labels)

# Print the result
print("Input Text:", input_text)
print("Predicted Class:", result["labels"][0])
print("Confidence Score:", result["scores"][0])
print(result)

Input Text: What phones are there?
Predicted Class: mobile phones
Confidence Score: 0.8313082456588745
{'sequence': 'What phones are there?', 'labels': ['mobile phones', 'mouses', 'cables and dividers', 'monitors', 'laptop', 'tvs', 'gaming laptops'], 'scores': [0.8313082456588745, 0.03666427731513977, 0.03313170373439789, 0.029806964099407196, 0.02954372577369213, 0.024152150377631187, 0.015392889268696308]}


#### Setting up Translator MKD to ENG

In [3]:
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")

In [4]:
target_lang = "eng_Latn"
source_lang = "mkd_Cyrl"
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source_lang, tgt_lang=target_lang, max_length = 400)

#### Translation Example

In [5]:
text = "Што гејминг лаптопи има со барем 8гб рам?"
output = translator(text)
translated_text = output[0]['translation_text']
print(translated_text)

What gaming laptop has at least 8GB of RAM?


In [6]:
result2= pipe(translated_text, possible_labels)
print("Input Text:", translated_text)
print("Predicted Class:", result2["labels"][0])
print("Confidence Score:", result2["scores"][0])

Input Text: What gaming laptop has at least 8GB of RAM?
Predicted Class: gaming laptops
Confidence Score: 0.6107091903686523


#### Extract query off API output

In [8]:
def extract_sql_query(text):
    # Define the regular expression pattern for the SQL query
    pattern = re.compile(r'```(?:sql)?\n(.*?)\n```', re.DOTALL)
    
    # Search for the pattern in the input text
    match = pattern.search(text)
    
    # If a match is found, return the query, otherwise return None
    return match.group(1) if match else None


#### Complete SQL Query Generator With Translator and Zero-Shot Classifier

In [10]:
user_question = "Што гејминг лаптопи има со барем 8гб РАМ меморија и имаат екран со OLED дисплеј?"
user_question_translated = translator(user_question)[0]['translation_text']
user_question_category= pipe(user_question_translated, possible_labels)["labels"][0]
print(user_question_translated)
print(user_question_category)
if(user_question_category == "cables and dividers"):
    sql_query = extract_sql_query(generate_sql_query_cable_and_divider_mistral(user_question_translated))
elif(user_question_category == "gaming laptops"):
    sql_query = extract_sql_query(generate_sql_query_gaming_laptop_anthropic(user_question_translated))
elif(user_question_category == "laptop"):
    sql_query = extract_sql_query(generate_sql_query_laptop_mistral(user_question_translated))
elif(user_question_category == "mouses"):
    sql_query = extract_sql_query(generate_sql_query_mouse_mistral(user_question_translated))
elif(user_question_category == "tvs"):
    sql_query = extract_sql_query(generate_sql_query_tv_mistral(user_question_translated))
elif(user_question_category == "monitors"):
    sql_query = extract_sql_query(generate_sql_query_monitor_mistral(user_question_translated))
elif(user_question_category == "mobile phones"):
    sql_query = extract_sql_query(generate_sql_query_mobile_phone_mistral(user_question_translated))
print(sql_query)

What gaming laptops have at least 8GB of RAM and have OLED displays?
gaming laptops
SELECT gl.title
FROM gaming_laptops gl
JOIN gaming_laptop_feature glf ON gl.id = glf.product_id
JOIN feature_gaming_laptops fgl ON glf.feature_id = fgl.id
WHERE fgl.feature_name = 'ram_size_gb' AND fgl.feature_value >= '8'
  AND gl.id IN (
    SELECT glf.product_id
    FROM gaming_laptop_feature glf
    JOIN feature_gaming_laptops fgl ON glf.feature_id = fgl.id
    WHERE fgl.feature_name LIKE '%screen_type%' AND fgl.feature_value LIKE '%OLED%'
  );


### Below TESTING

In [26]:
testing = generate_sql_query_cable_and_divider_mistral(translated_text)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\tomce\.cache\huggingface\token
Login successful


In [27]:
print(extract_sql_query(testing))

SELECT cables_and_dividers.title, cable_length_meters.feature_value, number_of_sockets.feature_value
FROM cables_and_dividers
JOIN cable_and_divider_feature AS cable_feature1 ON cables_and_dividers.id = cable_feature1.product_id
JOIN feature_cables_and_dividers AS cable_length_meters ON cable_feature1.feature_id = cable_length_meters.id
JOIN cable_and_divider_feature AS cable_feature2 ON cables_and_dividers.id = cable_feature2.product_id
JOIN feature_cables_and_dividers AS number_of_sockets ON cable_feature2.feature_id = number_of_sockets.id
WHERE cable_length_meters.feature_name = 'cable_length_meters'
AND CAST(cable_length_meters.feature_value AS INTEGER) >= 3
AND number_of_sockets.feature_name = 'number_of_sockets'
AND CAST(number_of_sockets.feature_value AS INTEGER) >= 3;


In [11]:
testing2 = generate_sql_query_tv_anthropic("What are the TVs that are 4k and have a screen size of 50 inches or more?")

In [14]:
print(testing2)

To find the TVs that are 4K and have a screen size of 50 inches or more, we can use the following SQL query:

```sql
SELECT tvs.title, screen_size.feature_value AS screen_size_inches, screen_resolution.feature_value AS screen_resolution
FROM tvs
JOIN tv_feature AS tvf1 ON tvs.id = tvf1.product_id
JOIN feature_tvs AS screen_size ON tvf1.feature_id = screen_size.id AND screen_size.feature_name = 'screen_size_inches'
JOIN tv_feature AS tvf2 ON tvs.id = tvf2.product_id
JOIN feature_tvs AS screen_resolution ON tvf2.feature_id = screen_resolution.id AND screen_resolution.feature_name = 'screen_resolution'
WHERE CAST(screen_size.feature_value AS INTEGER) >= 50
AND screen_resolution.feature_value LIKE '%1920 x 1080%' OR screen_resolution.feature_value LIKE '%3840 x 2160%';
```

Here's a breakdown of the query:

1. We join the `tvs` table with the `tv_feature` and `feature_tvs` tables to retrieve the TV title, screen size, and screen resolution.
2. The `JOIN` clauses connect the tables based on

In [12]:
print(extract_sql_query(testing2))

SELECT tvs.title, screen_size.feature_value AS screen_size_inches, screen_resolution.feature_value AS screen_resolution
FROM tvs
JOIN tv_feature AS tvf1 ON tvs.id = tvf1.product_id
JOIN feature_tvs AS screen_size ON tvf1.feature_id = screen_size.id AND screen_size.feature_name = 'screen_size_inches'
JOIN tv_feature AS tvf2 ON tvs.id = tvf2.product_id
JOIN feature_tvs AS screen_resolution ON tvf2.feature_id = screen_resolution.id AND screen_resolution.feature_name = 'screen_resolution'
WHERE CAST(screen_size.feature_value AS INTEGER) >= 50
AND screen_resolution.feature_value LIKE '%1920 x 1080%' OR screen_resolution.feature_value LIKE '%3840 x 2160%';


In [None]:
# template = """Based on the table schema below, question, sql query, and sql response, write a natural language response:
# {schema}

# Question: {question}
# SQL Query: {query}
# SQL Response: {response}"""
# prompt_response = ChatPromptTemplate.from_template(template)

# def run_query(query):
#     return input_db.run(query)

# full_chain = (
#     RunnablePassthrough.assign(query=sql_chain).assign(
#         schema=get_schema,
#         response=lambda vars: run_query(vars["query"]),
#     )
#     | prompt_response
#     | llm
# )
# print(full_chain.invoke({"question": user_question}))
