# Product recommendations using embeddings 

In [None]:
!pip3 install openai

In [None]:
import openai
import urllib

## loading OpenAI API key and setting it as environmental variable

In [None]:
with open("../openai_key.txt", "r") as file:
    openai_key = file.read()

In [None]:
import os
os.environ["OPENAI_KEY"] = openai_key

## Requesting OpenAI to generate Embedding 

In [None]:
import json

# Define the endpoint URL
url = 'https://api.openai.com/v1/embeddings'

# Create a function to make the API request
def get_openai_embedding(prompt):
    headers = {
        'Authorization': f'Bearer {openai_key}',
        'Content-Type': 'application/json',
    }
    data = {
        "input": prompt,    
        "model": "text-embedding-ada-002"
    }
    
    # Encode the data and make the POST request
    data = json.dumps(data).encode('utf-8')
    req = urllib.request.Request(url, data=data, headers=headers, method='POST')
    
    try:
        response = urllib.request.urlopen(req)
        response_data = json.loads(response.read().decode('utf-8'))
        return response_data['data'][0]['embedding']
    except urllib.error.HTTPError as e:
        print(f"HTTP Error: {e}")
        return None

In [None]:
cat = get_openai_embedding("cat")
dog = get_openai_embedding("dog")

## Finding Cosine Similarity between 2 embeddings

In [None]:
def cosine_similarity(a, b):
    similarity = 0
    for i,j in zip(a,b):
        similarity += i*j
    return similarity

In [None]:
car = get_openai_embedding("car")

In [None]:
cosine_similarity(cat,dog)

In [None]:
cosine_similarity(cat,car)

## Initalizing Data

In [None]:
product_data = {
# Wireless Earbuds
    0: ["Wireless Earbuds", "High-quality sound with noise cancellation, Bluetooth 5.0, and 24-hour battery life."],
    1: ["Wireless Earbuds Pro", "Enhanced bass, active noise canceling, sweatproof, and touch controls."],
    2: ["Sports Wireless Earbuds", "Water-resistant, secure fit for workouts, 12-hour battery life."],
    3: ["Budget Wireless Earbuds", "Affordable, good sound quality, compact design, and 8-hour battery life."],
    4: ["Gaming Wireless Earbuds", "Low latency, surround sound effects, perfect for gaming sessions."],
    5: ["Luxury Wireless Earbuds", "Premium build, active noise cancellation, 36-hour battery life."],
    6: ["Mini Wireless Earbuds", "Ultra-compact design, noise isolation, and USB-C charging."],
    7: ["True Wireless Earbuds", "Dual-mic setup for clear calls, IPX5 water resistance."],
    8: ["Eco-Friendly Wireless Earbuds", "Made from recycled materials, biodegradable packaging."],

# Gaming Laptops
    9: ["Gaming Laptop", "15.6-inch display, Intel i7 processor, NVIDIA RTX 3060, 16GB RAM, 512GB SSD."],
    10: ["Gaming Laptop Ultra", "17-inch 4K screen, AMD Ryzen 9, RTX 4080, 32GB RAM, 1TB SSD."],
    11: ["Budget Gaming Laptop", "14-inch screen, Intel i5 processor, GTX 1650, 8GB RAM, 256GB SSD."],
    12: ["Portable Gaming Laptop", "Lightweight, 15-inch screen, RTX 3050, and 10-hour battery life."],
    13: ["High-End Gaming Laptop", "QHD display, 240Hz refresh rate, RTX 4090, and vapor chamber cooling."],
    14: ["Entry-Level Gaming Laptop", "Intel i3, GTX 1050, 4GB RAM, suitable for casual gaming."],
    15: ["VR-Ready Gaming Laptop", "Powerful GPU, optimized for virtual reality experiences."],

# Smartwatches
    16: ["Smartwatch", "Fitness tracking, notifications, waterproof, customizable watch faces."],
    17: ["Luxury Smartwatch", "Premium design, AMOLED screen, ECG monitor, and 14-day battery life."],
    18: ["Kids Smartwatch", "GPS tracking, parental controls, and fun games for kids."],
    19: ["Sports Smartwatch", "Heart rate monitor, built-in GPS, and rugged design for outdoor use."],
    20: ["Hybrid Smartwatch", "Classic analog design with fitness tracking and notifications."],
    21: ["Budget Smartwatch", "Affordable, basic fitness tracking, and long-lasting battery."],

# Fitness Trackers
    22: ["Fitness Tracker", "Heart rate monitor, step counter, sleep analysis, waterproof, 7-day battery life."],
    23: ["Advanced Fitness Tracker", "Built-in GPS, SpO2 monitor, and AI-based fitness insights."],
    24: ["Budget Fitness Tracker", "Affordable price, basic fitness tracking, and 10-day battery."],
    25: ["Premium Fitness Tracker", "Luxury design, ECG monitor, and water resistance up to 50m."],
    26: ["Compact Fitness Tracker", "Slim and lightweight, perfect for 24/7 wear."],
    27: ["Kids Fitness Tracker", "Bright colors, parental tracking, and interactive activity challenges."],
    28: ["Rugged Fitness Tracker", "Shockproof and waterproof, designed for extreme sports."],
    29: ["Fashion Fitness Tracker", "Stylish design, customizable bands, and accurate fitness data."],
    30: ["Multi-Sport Fitness Tracker", "Track swimming, cycling, running, and more with precise metrics."],
    31: ["Eco-Friendly Fitness Tracker", "Solar-powered, biodegradable materials, and energy-efficient sensors."],

# Drones
    32: ["Drone with Camera", "4K UHD camera, GPS, 30-minute flight time, foldable design for easy transport."],
    33: ["Mini Drone with Camera", "Compact size, 1080p camera, 20-minute flight, and headless mode."],
    34: ["Professional Drone", "Long-range control, high-resolution camera, and obstacle avoidance."],
    35: ["Beginner Drone", "Easy controls, lightweight, and suitable for first-time users."],
    36: ["Racing Drone", "High-speed performance, aerodynamic design, and first-person view mode."],

# Bluetooth Speakers
    37: ["Bluetooth Speaker", "Portable, waterproof, 10-hour battery life, high-quality sound."],
    38: ["Party Bluetooth Speaker", "Built-in mic, colorful LED lights, 24-hour battery, and deep bass."],
    39: ["Compact Bluetooth Speaker", "Pocket-sized, clear sound, and wireless pairing."],
    40: ["Premium Bluetooth Speaker", "360-degree sound, rich bass, and sleek aluminum design."],
    41: ["Outdoor Bluetooth Speaker", "Rugged design, waterproof, and long-range Bluetooth connectivity."],
}

## Generating embeddings on data & storing it

In [None]:
for i in product_data.values():
    print(f"{i[0]}: {i[1]}")

In [None]:
for i in product_data.values():
    embd = get_openai_embedding(f"{i[0]}: {i[1]}")
    i.append(embd)

In [None]:
product_data[0]

## Getting user prompt and recommendations accordingly

In [None]:
def recommend(prompt):
    user_embd = get_openai_embedding(prompt)
    similarity_scores = []

    for product_id, product_info in product_data.items():
        product_embd = product_info[2]
        similarity = cosine_similarity(user_embd, product_embd)
        similarity_scores.append((similarity, product_id))

    similarity_scores.sort(key=lambda x: x[0], reverse=True)

    result = []
    for i in range(3):
        product_id = similarity_scores[i][1]
        product_info = product_data[product_id]
        result.append({
            "title": product_info[0],
            "description": product_info[1]
        })

    return result

In [None]:
prompt = "fitness tracker for childrens"
recommendations = recommend(prompt)
for product in recommendations:
    print(f"\n{product['title']}: {product['description']}")

## Creating Vector Database and creating a collection inside it

We are using Vector Database because the for loop iterate over the whole database and thus its ineffecient besause the complexity will be 0(n). The vector database uses a Tree-Like structure o store the data according to cosine-simillarity allowing us to to search in 0(log n) complexity.

In [None]:
!pip3 install -U pymilvus

In [None]:
from pymilvus import MilvusClient

client = MilvusClient("milvus_demo.db")

In [None]:
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
client.create_collection(
    collection_name="demo_collection",
    dimension=1536,
)

## Inserting our data into collection created

In [None]:
data = []
for id, prd in product_data.items():
    doc = {}
    doc["id"]=id
    doc["vector"]=prd[2]
    doc["text"]=prd[0]+": "+prd[1]
    doc["subject"]=prd[0]
    data.append(doc)

In [None]:
data

In [None]:
res = client.insert(collection_name="demo_collection", data=data)

print(res)

## Getting user prompt and search on Vector Database to get recommendations accordingly

In [None]:
def search(prompt):
    query_vectors = get_openai_embedding(prompt)
    res = client.search(
        collection_name="demo_collection",
        data=[query_vectors],
        limit=3,
        output_fields=["text", "subject"],
    )

    result = []
    
    for i in range(3):
        recm = res[0][i]
        p_id = recm['id']
        prod = product_data[p_id]
        result.append(f"{prod[0]}: {prod[1]}")

    return result

In [None]:
res = search("i am looking for a device to moniter health of my kid")
for product in res:
    print("\n"+str(product))