In [1]:
import sqlite3
import json
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain import SQLDatabase
from dotenv import load_dotenv, find_dotenv
import sys
import stat
from langchain_community.llms import HuggingFaceEndpoint
from langchain_anthropic import ChatAnthropic

  from .autonotebook import tqdm as notebook_tqdm


#### Load up API Keys

In [2]:
load_dotenv(find_dotenv())
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

#### Create Database File for Laptops

In [4]:
# Function to create database and tables
def create_database():
    conn = sqlite3.connect('product_database_laptops.db')
    c = conn.cursor()

    # Create products table
    c.execute('''CREATE TABLE IF NOT EXISTS laptops (
                    id INTEGER PRIMARY KEY,
                    url TEXT,
                    title TEXT,
                    warranty INTEGER,
                    regular_price REAL,
                    happy_price REAL,
                    category TEXT
                 )''')

    # Create feature_products table
    c.execute('''CREATE TABLE IF NOT EXISTS feature_laptops (
                    id INTEGER PRIMARY KEY,
                    feature_name TEXT,
                    feature_value TEXT
                 )''')

    # Create product_feature table
    c.execute('''CREATE TABLE IF NOT EXISTS laptop_feature (
                    product_id INTEGER,
                    feature_id INTEGER,
                    FOREIGN KEY (product_id) REFERENCES laptops(id),
                    FOREIGN KEY (feature_id) REFERENCES feature_laptops(id)
                 )''')

    conn.commit()
    conn.close()

# Function to insert product data into database
def insert_product_data(product_data):
    conn = sqlite3.connect('product_database_laptops.db')
    c = conn.cursor()

    # Insert product data into products table
    c.execute('''INSERT INTO laptops (url, title, warranty, regular_price, happy_price, category)
                 VALUES (?, ?, ?, ?, ?, ?)''', (
                    product_data.get('url'),
                    product_data.get('title'),
                    product_data.get('warranty_months'),
                    product_data.get('regular_price_mkd'),
                    product_data.get('happy_price_mkd'),
                    product_data.get('category')
                 ))
    product_id = c.lastrowid

    # Insert feature data into feature_products table and product_feature table
    for feature, value in product_data.get('features', {}).items():
        # Check if feature already exists
        c.execute('''SELECT id FROM feature_laptops WHERE feature_name = ? AND feature_value = ?''', (feature, value))
        feature_row = c.fetchone()
        if feature_row:
            feature_id = feature_row[0]
        else:
            c.execute('''INSERT INTO feature_laptops (feature_name, feature_value) VALUES (?, ?)''', (feature, value))
            feature_id = c.lastrowid

        # Insert into product_feature table
        c.execute('''INSERT INTO laptop_feature (product_id, feature_id) VALUES (?, ?)''',
                  (product_id, feature_id))

    conn.commit()
    conn.close()

# Function to read JSON files from a directory and insert data into database
def process_json_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            with open(os.path.join(directory, filename), 'r') as file:
                product_data = json.load(file)
                insert_product_data(product_data)

# Create database and tables
create_database()

# Process JSON files and insert data into database
json_directory = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\gaming_laptops\gaming_laptops_new_template'
process_json_files(json_directory)


#### Get All "feature_name=feature_value" pairs for each key-value 

In [5]:
def get_features_string(db_path):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Query to select all features
    cursor.execute("SELECT feature_name, feature_value FROM feature_laptops WHERE feature_value IS NOT NULL")
    
    # Fetch all rows from the executed query
    features = cursor.fetchall()
    
    # Close the database connection
    conn.close()
    
    # Format the features into the desired string format
    features_string = ", ".join([f"{name}={value}" for name, value in features])
    
    return features_string

# Example usage
db_path = 'product_database_laptops.db'
features_string = get_features_string(db_path)
print(features_string)

screen_size_inches=15.6, screen_resolution=1920 x 1080, screen_type=FHD IPS, screen_refresh_rate_hz=144, processor_model=Intel® Core i5-11400H, processor_cores=6, processor_base_clock_ghz=2.7, gpu_model=NVIDIA® GeForce RTX 3050, gpu_memory_gb=4, ram_size_gb=8, ram_type=DDR4, storage_size_tb=0.512, storage_type=SSD, operating_system=UEFI Shell, wifi_standard=IEEE 802.11 a/b/g/n/ac/ax, bluetooth_version=5.2, usb_type_c_ports=1, hdmi_ports=1, battery_cells=4, screen_type=FHD, IPS, SlimBezel, processor_model=Intel® Core™ i5-1240P, processor_cores=12, processor_threads=16, processor_max_clock_ghz=4.4, gpu_model=NVIDIA® GeForce RTX™ 2050, ram_size_gb=16, storage_type=PCIe NVMe SSD, operating_system=Без оперативен систем, wifi_standard=Wi-Fi 6 AX, bluetooth_version=BT, battery_capacity_wh=50, security_feature_1=Acer PBA fingerprint reader, screen_type=IPS, processor_model=Intel® Core™ i7-1260P, processor_max_clock_ghz=4.7, wifi_standard=IEEE 802.11ax, processor_model=Intel® Core™ i5-12450H, p

#### Connect with Database File

In [6]:
conn = sqlite3.connect('product_database_laptops.db')
c = conn.cursor()
input_db = SQLDatabase.from_uri('sqlite:///product_database_laptops.db')

#### Chat Prompt Template for Communicating With Laptops

In [7]:
template = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Please note that in the schema, the regular_price is the normal price, while the happy_price is a discounted one.
Table Descriptions:
laptops Table:
id: An integer representing the unique identifier for each laptop product.
url: A text field to store the URL of the product.
title: A text field for the product title.
warranty: An integer field representing the warranty duration (in months).
regular_price: A real number field for the regular price of the laptop.
happy_price: A real number field for the discounted price (if available).
category: A text field to categorize the laptop product (e.g., ‘Gaming’, ‘Business’, etc.).

feature_laptops Table:
id: An integer representing the unique identifier for each feature.
feature_name: A text field describing a specific feature (e.g., ‘Processor’, ‘RAM’, etc.).
feature_value: A text field containing the value of the feature (e.g., ‘Intel Core i7’, ‘16GB’, etc.).

laptop_feature Table:
product_id: An integer representing the foreign key reference to the id field in the Laptops Table.
feature_id: An integer representing the foreign key reference to the id field in the Feature Laptops Table."

Example input for questions asking for multiple features: 'What OLED TVs are there that have a display bigger than 60 inches?'
Example output for questions asking for multiple features:
'SELECT tvs.title, screen_size.feature_value, screen_type.feature_value 
FROM tvs
JOIN tv_feature AS tvf1 ON tvs.id = tvf1.product_id
JOIN feature_tvs AS screen_size ON tvf1.feature_id = screen_size.id
JOIN tv_feature AS tvf2 ON tvs.id = tvf2.product_id
JOIN feature_tvs AS screen_type ON tvf2.feature_id = screen_type.id
WHERE screen_size.feature_name = 'screen_size_inches'
AND CAST(screen_size.feature_value AS INTEGER) >= 60
AND screen_type.feature_name = 'panel_type'
AND screen_type.feature_value LIKE '%OLED%';'

Don't include a feature in the query if the user doesn't ask for it.

You ABSOLUTELY MUST use the ‘LIKE’ operator instead of ‘=’ in the SQL query for columns in the tables that are of TEXT value.
You ABSOLUTELY MUST use the ‘=’ operator instead of ‘LIKE’ in the SQL query for columns in the tables that are of REAL, DOUBLE, or INTEGER value.
Here are all the features a user might ask for:
{features}
Question: {question}
SQL Query:
"""
prompt = ChatPromptTemplate.from_template(template)

def get_schema(db):
    schema = input_db.get_table_info()
    return schema

#### 3 Models to Choose From: HuggingFace Mistral, Gemini, and Claude Sonnet

In [8]:
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=128, temperature=0.1, token=HUGGINGFACEHUB_API_TOKEN
)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\tomce\.cache\huggingface\token
Login successful


In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.1)

In [None]:
llm = ChatAnthropic(temperature=0, model_name="claude-3-sonnet-20240229")

#### Ask Question to Get Query Answer

In [None]:
sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

user_question = 'What laptops are there that have at least 8GB of RAM?'
smth= sql_chain.invoke({"features": features_string,"question": user_question})
print(smth)
output_string = smth.replace("```sql", "").replace("```", "")
print(output_string)

### BELOW MISTRALAI

In [9]:
sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

user_question = 'What laptops are there that have at least 8GB of RAM?'
smth= sql_chain.invoke({"features": features_string,"question": user_question})
print(smth)
output_string = smth.replace("```sql", "").replace("```", "")
print(output_string)


```sql
SELECT laptops.title, ram_size.feature_value
FROM laptops
JOIN laptop_feature AS laptop_feature1 ON laptops.id = laptop_feature1.product_id
JOIN feature_laptops AS ram_size ON laptop_feature1.feature_id = ram_size.id
WHERE ram_size.feature_name = 'ram_size_gb'
AND CAST(ram_size.feature_value AS INTEGER) >= 8;
```

This query will return the title and the RAM size of all laptops that have at least 8GB of RAM.


SELECT laptops.title, ram_size.feature_value
FROM laptops
JOIN laptop_feature AS laptop_feature1 ON laptops.id = laptop_feature1.product_id
JOIN feature_laptops AS ram_size ON laptop_feature1.feature_id = ram_size.id
WHERE ram_size.feature_name = 'ram_size_gb'
AND CAST(ram_size.feature_value AS INTEGER) >= 8;


This query will return the title and the RAM size of all laptops that have at least 8GB of RAM.
