In [2]:
import sqlite3
import json
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain import SQLDatabase
from dotenv import load_dotenv, find_dotenv
import sys
import stat
from langchain_community.llms import HuggingFaceEndpoint
from langchain_anthropic import ChatAnthropic

  from .autonotebook import tqdm as notebook_tqdm


#### Load up API Keys

In [3]:
load_dotenv(find_dotenv())
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

#### Create Database File for Cables and Dividers

In [5]:
# Function to create database and tables
def create_database():
    conn = sqlite3.connect('product_database_cables_and_dividers.db')
    c = conn.cursor()

    # Create products table
    c.execute('''CREATE TABLE IF NOT EXISTS cables_and_dividers (
                    id INTEGER PRIMARY KEY,
                    url TEXT,
                    title TEXT,
                    warranty INTEGER,
                    regular_price REAL,
                    happy_price REAL,
                    category TEXT
                 )''')

    # Create feature_products table
    c.execute('''CREATE TABLE IF NOT EXISTS feature_cables_and_dividers (
                    id INTEGER PRIMARY KEY,
                    feature_name TEXT,
                    feature_value TEXT
                 )''')

    # Create product_feature table
    c.execute('''CREATE TABLE IF NOT EXISTS cable_and_divider_feature (
                    product_id INTEGER,
                    feature_id INTEGER,
                    FOREIGN KEY (product_id) REFERENCES cables_and_dividers(id),
                    FOREIGN KEY (feature_id) REFERENCES feature_cables_and_dividers(id)
                 )''')

    conn.commit()
    conn.close()

# Function to insert product data into database
def insert_product_data(product_data):
    conn = sqlite3.connect('product_database_cables_and_dividers.db')
    c = conn.cursor()

    # Insert product data into products table
    c.execute('''INSERT INTO cables_and_dividers (url, title, warranty, regular_price, happy_price, category)
                 VALUES (?, ?, ?, ?, ?, ?)''', (
                    product_data.get('url'),
                    product_data.get('title'),
                    product_data.get('warranty_months'),
                    product_data.get('regular_price_mkd'),
                    product_data.get('happy_price_mkd'),
                    product_data.get('category')
                 ))
    product_id = c.lastrowid

    # Insert feature data into feature_products table and product_feature table
    for feature, value in product_data.get('features', {}).items():
        # Check if feature already exists
        c.execute('''SELECT id FROM feature_cables_and_dividers WHERE feature_name = ? AND feature_value = ?''', (feature, value))
        feature_row = c.fetchone()
        if feature_row:
            feature_id = feature_row[0]
        else:
            c.execute('''INSERT INTO feature_cables_and_dividers (feature_name, feature_value) VALUES (?, ?)''', (feature, value))
            feature_id = c.lastrowid

        # Insert into product_feature table
        c.execute('''INSERT INTO cable_and_divider_feature (product_id, feature_id) VALUES (?, ?)''',
                  (product_id, feature_id))

    conn.commit()
    conn.close()

# Function to read JSON files from a directory and insert data into database
def process_json_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            with open(os.path.join(directory, filename), 'r') as file:
                product_data = json.load(file)
                insert_product_data(product_data)

# Create database and tables
create_database()

# Process JSON files and insert data into database
json_directory = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\cables_and_dividers\cables_and_dividers_new_template'
process_json_files(json_directory)


#### Get All "feature_name=feature_value" pairs for each key-value 

In [None]:
def get_features_string(db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Query to select all features
    cursor.execute("SELECT feature_name, feature_value FROM feature_cables_and_dividers WHERE feature_value IS NOT NULL")
    
    # Fetch all rows from the executed query
    features = cursor.fetchall()
    
    # Close the database connection
    conn.close()
    
    # Format the features into the desired string format
    features_string = ", ".join([f"{name}={value}" for name, value in features])
    
    return features_string

# Example usage
db_path = 'product_database_cables_and_dividers.db'
features_string = get_features_string(db_path)
print(features_string)

#### Connect with Database File

In [7]:
conn = sqlite3.connect('product_database_cables_and_dividers.db')
c = conn.cursor()
input_db = SQLDatabase.from_uri('sqlite:///product_database_cables_and_dividers.db')

#### Chat Prompt Template for Communicating With Cables and Dividers

In [8]:
template = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Please note that in the schema, the regular_price is the normal price, while the happy_price is a discounted one.
Table Descriptions:
cables_and_dividers Table:
id: An integer representing the unique identifier for each cable and divider product.
url: A text field to store the URL of the product.
title: A text field for the product title.
warranty: An integer field representing the warranty duration (in months).
regular_price: A real number field for the regular price of the product.
happy_price: A real number field for the discounted price (if available).
category: A text field to categorize the product (e.g., ‘HDMI cables’, ‘USB dividers’, etc.).

feature_cables_and_dividers Table:
id: An integer representing the unique identifier for each feature.
feature_name: A text field describing a specific feature (e.g., ‘Length’, ‘Connector Type’, etc.).
feature_value: A text field containing the value of the feature (e.g., ‘6 feet’, ‘USB-A to USB-C’, etc.).

cable_and_divider_feature Table:
product_id: An integer representing the foreign key reference to the id field in the Cables and Dividers Table.
feature_id: An integer representing the foreign key reference to the id field in the Feature Cables and Dividers Table."

Example input for questions asking for multiple features: 'What OLED TVs are there that have a display bigger than 60 inches?'
Example output for questions asking for multiple features:
'SELECT tvs.title, screen_size.feature_value, screen_type.feature_value 
FROM tvs
JOIN tv_feature AS tvf1 ON tvs.id = tvf1.product_id
JOIN feature_tvs AS screen_size ON tvf1.feature_id = screen_size.id
JOIN tv_feature AS tvf2 ON tvs.id = tvf2.product_id
JOIN feature_tvs AS screen_type ON tvf2.feature_id = screen_type.id
WHERE screen_size.feature_name = 'screen_size_inches'
AND CAST(screen_size.feature_value AS INTEGER) >= 60
AND screen_type.feature_name = 'panel_type'
AND screen_type.feature_value LIKE '%OLED%';'
Don't include a feature in the query if the user doesn't ask for it.

You ABSOLUTELY MUST use the ‘LIKE’ operator instead of ‘=’ in the SQL query for columns in the tables that are of TEXT value.
You ABSOLUTELY MUST use the ‘=’ operator instead of ‘LIKE’ in the SQL query for columns in the tables that are of REAL, DOUBLE, or INTEGER value.
Here are all the features a user might ask for:
{features}
Question: {question}
SQL Query:
"""
prompt = ChatPromptTemplate.from_template(template)

def get_schema(db):
    schema = input_db.get_table_info()
    return schema

#### 3 Models to Choose From: HuggingFace Mistral, Gemini, and Claude Sonnet

In [9]:
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(
    repo_id=repo_id, temperature=0.1, token=HUGGINGFACEHUB_API_TOKEN
)

                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\tomce\.cache\huggingface\token
Login successful


In [19]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.5)

In [11]:
llm = ChatAnthropic(temperature=0, model_name="claude-3-sonnet-20240229")

#### Ask Question to Get Query Answer

In [None]:
sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)
user_question = 'What cables are there that are at least 1.5 meters and have 3 sockets?'
smth= sql_chain.invoke({"features": features_string,"question": user_question})
print(smth)

### BELOW MISTRALAI

In [10]:
sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

user_question = 'What cables are there that are at least 1.5 meters and have 3 sockets?'
smth= sql_chain.invoke({"features": features_string,"question": user_question})
print(smth)
output_string = smth.replace("```sql", "").replace("```", "")
print(output_string)

```
SELECT cables_and_dividers.title, cable_length_meters.feature_value, number_of_sockets.feature_value
FROM cables_and_dividers
JOIN cable_and_divider_feature AS cable_feature1 ON cables_and_dividers.id = cable_feature1.product_id
JOIN feature_cables_and_dividers AS cable_length_meters ON cable_feature1.feature_id = cable_length_meters.id
JOIN cable_and_divider_feature AS cable_feature2 ON cables_and_dividers.id = cable_feature2.product_id
JOIN feature_cables_and_dividers AS number_of_sockets ON cable_feature2.feature_id = number_of_sockets.id
WHERE cable_length_meters.feature_name = 'cable_length_meters'
AND CAST(cable_length_meters.feature_value AS DECIMAL(5,2)) >= 1.5
AND number_of_sockets.feature_name = 'number_of_sockets'
AND CAST(number_of_sockets.feature_value AS INTEGER) >= 3;
```
Explanation:
- We join the cables_and_dividers table with the cable_and_divider_feature table twice, once for the cable length and once for the number of sockets.
- We use the CAST function to conve