#### This was a testing DB creator and query generator

In [1]:
import sqlite3
import json
import os
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain import SQLDatabase

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Function to create database and tables
def create_database():
    conn = sqlite3.connect('product_database2.db')
    c = conn.cursor()

    # Create products table
    c.execute('''CREATE TABLE IF NOT EXISTS products (
                    id INTEGER PRIMARY KEY,
                    url TEXT,
                    title TEXT,
                    warranty INTEGER,
                    regular_price REAL,
                    happy_price REAL
                 )''')

    # Create feature_products table
    c.execute('''CREATE TABLE IF NOT EXISTS feature_products (
                    id INTEGER PRIMARY KEY,
                    feature_name TEXT
                 )''')

    # Create product_feature table
    c.execute('''CREATE TABLE IF NOT EXISTS product_feature (
                    product_id INTEGER,
                    feature_id INTEGER,
                    value TEXT,
                    FOREIGN KEY (product_id) REFERENCES products(id),
                    FOREIGN KEY (feature_id) REFERENCES feature_products(id)
                 )''')

    conn.commit()
    conn.close()

# Function to insert product data into database
def insert_product_data(product_data):
    conn = sqlite3.connect('product_database2.db')
    c = conn.cursor()

    # Insert product data into products table
    c.execute('''INSERT INTO products (url, title, warranty, regular_price, happy_price)
                 VALUES (?, ?, ?, ?, ?)''', (product_data['url'], product_data['title'], 
                 product_data['warranty'], product_data['regular_price'], product_data['happy_price']))
    product_id = c.lastrowid

    # Insert feature data into feature_products table and product_feature table
    for feature, value in product_data['features'].items():
        # Check if feature already exists
        c.execute('''SELECT id FROM feature_products WHERE feature_name = ?''', (feature,))
        feature_row = c.fetchone()
        if feature_row:
            feature_id = feature_row[0]
        else:
            c.execute('''INSERT INTO feature_products (feature_name) VALUES (?)''', (feature,))
            feature_id = c.lastrowid

        # Insert into product_feature table
        c.execute('''INSERT INTO product_feature (product_id, feature_id, value) VALUES (?, ?, ?)''',
                  (product_id, feature_id, value))

    conn.commit()
    conn.close()

# Function to read JSON files from a directory and insert data into database
def process_json_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            with open(os.path.join(directory, filename), 'r') as file:
                product_data = json.load(file)
                insert_product_data(product_data)

# Create database and tables
create_database()

# Process JSON files and insert data into database
json_directory = r'C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_categories_fixed\gaming_laptops\gaming_laptops_flattened'

process_json_files(json_directory)

In [4]:
conn = sqlite3.connect('product_database2.db')
c = conn.cursor()

llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)

input_db = SQLDatabase.from_uri('sqlite:///product_database2.db')

In [11]:
template = """Based on the table schema below, write a SQL query that would answer the user's question:
{schema}

Here are all the features for the products the user might ask for:
1	Display
2	Resolution
3	Processor
4	Graphics
5	RAM
6	Storage
7	LAN
8	Bluetooth
9	Microphone
10	Operating System
11	HDMI Port
12	USB Port
13	LAN Port
14	Type C Port
15	Battery
16	Wireless
17	Fingerprint Reader
18	Camera
19	Power Adapter
20	Headset Port
21	RJ45 Ethernet Port
22	USB Ports
23	USB-C Port
24	Display Port
25	Backlit Keyboard
26	Keyboard Backlight
27	Network Card
28	USB Type-C Port
29	Mic
30	Audio Line Out
31	Battery Life
32	Battery Runtime
33	DisplayPort
34	USB Gen2 Port
35	USB Gen1 Port
36	Speakers
37	Weight
38	Ports
39	Audio
40	Ethernet Port
41	Thunderbolt Port
42	Card Reader
43	Security
44	Keyboard
45	Fast Charging
46	Fast Charge
47	NVIDIA G-Sync
48	Audio Tech
49	Audio Certification
50	RJ45 LAN port
51	Thunderbolt 4 port
52	USB 3.2 Gen 2 Type-C port
53	USB 3.2 Gen 2 Type-A port
54	Card reader
55	HDMI 2.1 FRL port
56	3.5 mm Combo Audio Jack port
57	Adaptive-Sync Technology
58	Thunderbolt 4
59	USB-C
60	USB-A
61	HDMI
62	Aura Sync
63	Dolby Atmos
64	Hi-Res Certification
65	Smart Amp Technology
66	USB-C Ports
67	Thunderbolt 4 Port
68	USB-C Port with DisplayPort
69	USB-A Ports
70	Color
71	Wi-Fi
72	USB 2.0 Ports
73	USB 3.2 Gen 1 Ports
74	USB 3.2 Gen 2 Ports
75	Mini DisplayPort
76	Microphone Jack
77	Microphone Port
78	DC-in Port
79	MicroSD Card Reader
80	Mini DP Port
81	DTS Technology
82	USB 2.0 Port
83	USB 3.2 Gen 1 Port
84	USB 3.2 Gen 2 Port
85	Chipset
86	Network
87	Ethernet
Question: {question}
SQL Query:"""
prompt = ChatPromptTemplate.from_template(template)

def get_schema(db):
    schema = input_db.get_table_info()
    return schema

In [6]:
print(get_schema(input_db))


CREATE TABLE feature_products (
	id INTEGER, 
	feature_name TEXT, 
	PRIMARY KEY (id)
)

/*
3 rows from feature_products table:
id	feature_name
1	Display
2	Resolution
3	Processor
*/


CREATE TABLE product_feature (
	product_id INTEGER, 
	feature_id INTEGER, 
	value TEXT, 
	FOREIGN KEY(product_id) REFERENCES products (id), 
	FOREIGN KEY(feature_id) REFERENCES feature_products (id)
)

/*
3 rows from product_feature table:
product_id	feature_id	value
1	1	15.6" (39.6cm)
1	2	(1920 x 1080) FHD IPS 144Hz
1	3	Intel® Core i5-11400H 2.70 GHz Hexa-core
*/


CREATE TABLE products (
	id INTEGER, 
	url TEXT, 
	title TEXT, 
	warranty INTEGER, 
	regular_price REAL, 
	happy_price REAL, 
	PRIMARY KEY (id)
)

/*
3 rows from products table:
id	url	title	warranty	regular_price	happy_price
1	https://www.neptun.mk/categories/gaming_laptopi/ACER-AN515-57-53A7-i5-11400H-8GB-512B-RTX-3050-4GB	ЛАПТОП ACER AN515-57-53A7 I5-11400H/8GB/512B/RTX 3050 4GB	24	55999.0	47999.0
2	https://www.neptun.mk/categories/gaming_l

In [15]:
sql_chain = (
    RunnablePassthrough.assign(schema=get_schema)
    | prompt
    | llm.bind(stop=["\nSQLResult:"])
    | StrOutputParser()
)

user_question = 'What laptops are there that have an NVIDIA graphics card and have an AMD processor?'
smth= sql_chain.invoke({"question": user_question})
print(smth)
output_string = smth.replace("```sql", "").replace("```", "")
print(output_string)

```sql
SELECT DISTINCT
  p.title
FROM products AS p
JOIN product_feature AS pf
  ON p.id = pf.product_id
JOIN feature_products AS fp
  ON pf.feature_id = fp.id
WHERE
  fp.feature_name = 'Graphics' AND pf.value LIKE '%NVIDIA%'
INTERSECT
SELECT DISTINCT
  p.title
FROM products AS p
JOIN product_feature AS pf
  ON p.id = pf.product_id
JOIN feature_products AS fp
  ON pf.feature_id = fp.id
WHERE
  fp.feature_name = 'Processor' AND pf.value LIKE '%AMD%';
```

SELECT DISTINCT
  p.title
FROM products AS p
JOIN product_feature AS pf
  ON p.id = pf.product_id
JOIN feature_products AS fp
  ON pf.feature_id = fp.id
WHERE
  fp.feature_name = 'Graphics' AND pf.value LIKE '%NVIDIA%'
INTERSECT
SELECT DISTINCT
  p.title
FROM products AS p
JOIN product_feature AS pf
  ON p.id = pf.product_id
JOIN feature_products AS fp
  ON pf.feature_id = fp.id
WHERE
  fp.feature_name = 'Processor' AND pf.value LIKE '%AMD%';



#### This is supposed to be used to run the query and generate a natural language response, but it was never used because the query needs to be extracted out of the output.

In [None]:
template = """Based on the table schema below, question, sql query, and sql response, write a natural language response:
{schema}

Question: {question}
SQL Query: {query}
SQL Response: {response}"""
prompt_response = ChatPromptTemplate.from_template(template)

def run_query(query):
    return input_db.run(query)

full_chain = (
    RunnablePassthrough.assign(query=sql_chain).assign(
        schema=get_schema,
        response=lambda vars: run_query(vars["query"]),
    )
    | prompt_response
    | llm
)
print(full_chain.invoke({"question": user_question}))