In [1]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
!pip install scikit-learn
!pip install fastapi uvicorn


Collecting fastapi
  Obtaining dependency information for fastapi from https://files.pythonhosted.org/packages/6d/45/d9d3e8eeefbe93be1c50060a9d9a9f366dba66f288bb518a9566a23a8631/fastapi-0.117.1-py3-none-any.whl.metadata
  Downloading fastapi-0.117.1-py3-none-any.whl.metadata (28 kB)
Collecting uvicorn
  Obtaining dependency information for uvicorn from https://files.pythonhosted.org/packages/96/06/5cc0542b47c0338c1cb676b348e24a1c29acabc81000bced518231dded6f/uvicorn-0.36.0-py3-none-any.whl.metadata
  Downloading uvicorn-0.36.0-py3-none-any.whl.metadata (6.6 kB)
Collecting starlette<0.49.0,>=0.40.0 (from fastapi)
  Obtaining dependency information for starlette<0.49.0,>=0.40.0 from https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl.metadata
  Downloading starlette-0.48.0-py3-none-any.whl.metadata (6.3 kB)
Collecting typing-extensions>=4.8.0 (from fastapi)
  Obtaining dependency information for typin

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jupyter-server 1.23.4 requires anyio<4,>=3.1.0, but you have anyio 4.10.0 which is incompatible.
langchain 0.0.314 requires anyio<4.0, but you have anyio 4.10.0 which is incompatible.


In [10]:
pip install --upgrade fastapi pydantic typing_extensions


Collecting pydantic
  Obtaining dependency information for pydantic from https://files.pythonhosted.org/packages/3e/d3/108f2006987c58e76691d5ae5d200dd3e0f532cb4e5fa3560751c3a1feba/pydantic-2.11.9-py3-none-any.whl.metadata
  Downloading pydantic-2.11.9-py3-none-any.whl.metadata (68 kB)
     ---------------------------------------- 0.0/68.4 kB ? eta -:--:--
     ---------------------------- --------- 51.2/68.4 kB 871.5 kB/s eta 0:00:01
     -------------------------------------- 68.4/68.4 kB 938.7 kB/s eta 0:00:00
Collecting pydantic-core==2.33.2 (from pydantic)
  Obtaining dependency information for pydantic-core==2.33.2 from https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl.metadata
  Downloading pydantic_core-2.33.2-cp311-cp311-win_amd64.whl.metadata (6.9 kB)
Collecting typing-inspection>=0.4.0 (from pydantic)
  Obtaining dependency information for typing-inspection>=0.4.0 from http

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\asus\\anaconda3\\Lib\\site-packages\\~ydantic_core\\_pydantic_core.cp311-win_amd64.pyd'
Consider using the `--user` option or check the permissions.



In [11]:
# -------------------- Imports --------------------
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json

# -------------------- Load JSON Dataset --------------------
with open("C:\\Users\\asus\\Downloads\\Dice Challenge\\Recommend\\data2.json", "r") as f:
    data = json.load(f)  # list of dicts

# Keep only relevant fields
allowed_fields = ["id", "name", "description", "imageUrl", "price", "rating", "likes", "sex", "inStock"]
clean_data = [{k: item[k] for k in allowed_fields if k in item} for item in data]

# Convert to DataFrame
df = pd.DataFrame(clean_data)

# Fill missing numeric fields with 0
df["price"] = df["price"].fillna(0)
df["likes"] = df["likes"].fillna(0)
df["rating"] = df["rating"].fillna(0)

# Normalization helpers
max_price = df["price"].max()
max_likes = df["likes"].max()

# TF-IDF Vectorizer for product descriptions
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["description"].fillna(""))

# -------------------- Pydantic Models --------------------
class CartItem(BaseModel):
    id: str
    name: str
    description: str
    price: float
    rating: float
    likes: int
    sex: str
    inStock: bool

class Cart(BaseModel):
    items: List[CartItem]

# -------------------- Helper Functions --------------------
def safe_float(x):
    """Convert NaN or None to 0.0 for JSON serialization"""
    if x is None or (isinstance(x, float) and (x != x)):  # check for nan
        return 0.0
    return x

def compute_score(cart_item, product, cart_vec, product_vec):
    try:
        # Compare price, rating, likes, inStock
        price_score = 1 - abs(cart_item.get("price",0) - product.get("price",0)) / max_price
        rating_score = product.get("rating",0) / 5
        like_score = product.get("likes",0) / max_likes
        ready_score = 1 if cart_item.get("inStock",0) == product.get("inStock",0) else 0

        numeric_score = 0.4*price_score + 0.3*rating_score + 0.2*like_score + 0.1*ready_score
        text_score = cosine_similarity(cart_vec, product_vec)[0][0]

        total_score = 0.4*text_score + 0.6*numeric_score
        return total_score
    except Exception as e:
        print("Error in compute_score:", e)
        return -1

def recommend_for_item(cart_item, df, top_k=2):
    cart_vec = vectorizer.transform([cart_item.get("description","")])
    scores = []
    for idx, prod in df.iterrows():
        product_vec = tfidf_matrix[idx]
        score = compute_score(cart_item, prod, cart_vec, product_vec)
        if score >= 0:
            scores.append((prod, score))
    top = sorted(scores, key=lambda x: x[1], reverse=True)[:top_k]
    return [(p[0], safe_float(p[1])) for p in top]

def recommend_for_cart(cart_items, df, top_k_final=10):
    all_candidates = []
    for cart_item in cart_items:
        top2 = recommend_for_item(cart_item, df, top_k=2)
        all_candidates.extend(top2)

    final_top = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:top_k_final]

    # Convert Series to dict safely and sanitize floats
    result = []
    for prod, score in final_top:
        prod_dict = {k: safe_float(v) if isinstance(v, float) else v for k, v in prod.to_dict().items()}
        # Optional: include score in response
        # prod_dict["score"] = score
        result.append(prod_dict)
    return result

ImportError: cannot import name 'Doc' from 'typing_extensions' (c:\Users\asus\anaconda3\Lib\site-packages\typing_extensions.py)

In [None]:
# -------------------- FastAPI App --------------------
app = FastAPI(title="Product Recommendation API")

@app.post("/recommend")
def get_recommendations(cart: Cart):
    cart_items = [item.model_dump() for item in cart.items]
    recommendations = recommend_for_cart(cart_items, df, top_k_final=10)

    # Only return essential fields
    allowed_fields = ["id", "name", "description", "imageUrl", "price", "rating", "likes", "sex", "inStock"]
    filtered_recommendations = [
        {k: v for k, v in rec.items() if k in allowed_fields}
        for rec in recommendations
    ]

    return {"recommendations": filtered_recommendations}


# **This code is for local testing**

In [None]:
import nest_asyncio
import uvicorn
from threading import Thread
import time
import requests
import socket
import json

nest_asyncio.apply()

# --- Helper to find a free port ---
def find_free_port():
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('', 0))
    port = s.getsockname()[1]
    s.close()
    return port

# Choose a port
PORT = find_free_port()

# --- Start FastAPI in background ---
def run_app():
    uvicorn.run(app, host="127.0.0.1", port=PORT, log_level="info")

thread = Thread(target=run_app, daemon=True)
thread.start()

# --- Wait for server to be ready ---
url = f"http://127.0.0.1:{PORT}/recommend"
for _ in range(10):  # retry for ~10 seconds
    try:
        r = requests.get(f"http://127.0.0.1:{PORT}/docs", timeout=1)
        if r.status_code == 200:
            print("✅ Server is ready!")
            break
    except:
        time.sleep(1)
else:
    raise RuntimeError("❌ FastAPI server did not start in time")

# --- Test request data (new schema) ---
cart_data = {
    "items": [
        {
            "id": "1",
            "name": "Noise Cancelling Headphones",
            "description": "Wired headphones with premium noise cancellation",
            "price": 12000,
            "rating": 4.0,
            "likes": 200,
            "sex": "NA",
            "inStock": True
        },
        {
            "id": "2",
            "name": "Summer Cotton T-shirt",
            "description": "Casual cotton t-shirt for summer daily wear",
            "price": 500,
            "rating": 4.5,
            "likes": 300,
            "sex": "Male",
            "inStock": True
        }
    ]
}

# --- Send request ---
try:
    response = requests.post(url, json=cart_data, timeout=10)
    print("Status Code:", response.status_code)
    print("Request Payload:", json.dumps(cart_data, indent=2))
    print("Response JSON:", json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
    print("❌ Request error:", e)
except ValueError as e:
    print("❌ Error parsing response:", e)
    print(response.text)


INFO:     Started server process [476]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:54313 (Press CTRL+C to quit)


INFO:     127.0.0.1:54316 - "GET /docs HTTP/1.1" 200 OK
✅ Server is ready!
INFO:     127.0.0.1:54320 - "POST /recommend HTTP/1.1" 200 OK
Status Code: 200
Request Payload: {
  "items": [
    {
      "id": "1",
      "name": "Noise Cancelling Headphones",
      "description": "Wired headphones with premium noise cancellation",
      "price": 12000,
      "rating": 4.0,
      "likes": 200,
      "sex": "NA",
      "inStock": true
    },
    {
      "id": "2",
      "name": "Summer Cotton T-shirt",
      "description": "Casual cotton t-shirt for summer daily wear",
      "price": 500,
      "rating": 4.5,
      "likes": 300,
      "sex": "Male",
      "inStock": true
    }
  ]
}
Response JSON: {
  "recommendations": [
    {
      "id": 23,
      "name": "Wireless Bluetooth Headphones",
      "description": "Premium wireless headphones with noise cancellation",
      "imageUrl": "https://images.meesho.com/images/products/546019146/xzzgz_512.avif?width=360",
      "price": 1999,
      "ratin

INFO:     127.0.0.1:54322 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54323 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54324 - "GET /.well-known/appspecific/com.chrome.devtools.json HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54363 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54363 - "GET /.well-known/appspecific/com.chrome.devtools.json HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54378 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54378 - "GET /.well-known/appspecific/com.chrome.devtools.json HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:53755 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:53755 - "GET /.well-known/appspecific/com.chrome.devtools.json HTTP/1.1" 404 Not Found


In [None]:
# import requests
# import os
# import pandas as pd
# import random
# import time

# # --- Settings ---
# rows_per_cat = 50
# sizes = ["Small", "Medium", "Large"]
# categories = ["Clothing", "Electronics", "Furniture", "Books", "Toys"]
# ready_opts = [0, 1]
# sex_opts = ["Male", "Female", "Unisex"]

# category_desc = {
#     "Clothing": ["Comfortable cotton t-shirt", "Stylish jeans", "Elegant dress", "Warm hoodie", "Casual jacket"],
#     "Electronics": ["Latest smartphone", "High-resolution monitor", "Wireless headphones", "Portable speaker", "Smartwatch with GPS", "Boat headphones", "Wired headphones"],
#     "Furniture": ["Wooden dining table", "Ergonomic office chair", "Modern sofa", "Queen size bed", "Bookshelf with storage"],
#     "Books": ["Bestselling novel", "Educational textbook", "Science fiction story", "Inspirational biography", "Mystery thriller"],
#     "Toys": ["Puzzle game for kids", "Remote-controlled car", "Stuffed animal", "Building blocks set", "Educational toy kit"]
# }

# # Pixabay API key
# PIXABAY_KEY = "52385492-d6d5dd7799d350d997d2e3152"
# image_folder = "product_images1"
# os.makedirs(image_folder, exist_ok=True)

# def download_pixabay_image(query, save_path):
#     url = f"https://pixabay.com/api/?key={PIXABAY_KEY}&q={query}&image_type=photo&per_page=3"
#     response = requests.get(url).json()
#     hits = response.get("hits")
#     if hits:
#         img_url = hits[0]["largeImageURL"]
#         img_data = requests.get(img_url).content
#         with open(save_path, "wb") as f:
#             f.write(img_data)
#         return True
#     return False

# # --- Generate dataset ---
# data = []
# product_id = 1

# for category in categories:
#     for _ in range(rows_per_cat):
#         size = random.choice(sizes) if category == "Clothing" else "NA"
#         desc = random.choice(category_desc[category])
#         image_path = f"{image_folder}/{product_id}.jpg"

#         # Use category + keyword for Pixabay search
#         query = f"{category} {desc.split()[0]}"
#         success = download_pixabay_image(query, image_path)
#         if not success:
#             # fallback placeholder
#             image_path = "product_images/placeholder.jpg"

#         row = {
#             "ProductID": product_id,
#             "Desc": desc,
#             "Size": size,
#             "Category": category,
#             "Price": random.randint(100, 20000),
#             "Ready": random.choice(ready_opts),
#             "Rating": round(random.uniform(1, 5), 1),
#             "Likes": random.randint(0, 1000),
#             "Sex": random.choice(sex_opts),
#             "ImagePath": image_path
#         }
#         data.append(row)
#         product_id += 1
#         time.sleep(0.2)  # avoid hitting API limits

# # Save CSV
# df = pd.DataFrame(data)
# df.to_csv("matching_dataset_with_pixabay_images.csv", index=False)
# print("✅ Dataset created with images downloaded from Pixabay!")


✅ Dataset created with images downloaded from Pixabay!


In [None]:
# from google.colab import files
# import shutil

# # Zip the folder you actually want to download
# shutil.make_archive("product_images1", 'zip', "product_images1")

# # Download the zip
# files.download("product_images1.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>