<a href="https://colab.research.google.com/github/jkeegan165/JoeResume/blob/main/friday_recommend02132026.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!pip install faker
import pandas as pd
import numpy as np
from faker import Faker
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split



In [3]:
!pip install faker scikit-learn scipy




In [4]:
fake = Faker()

NUM_USERS = 10000
NUM_PRODUCTS = 5000
NUM_PURCHASES = 150000


In [5]:
users = pd.DataFrame({
    "user_id": range(1, NUM_USERS + 1),
    "name": [fake.name() for _ in range(NUM_USERS)],
    "email": [fake.email() for _ in range(NUM_USERS)],
    "city": [fake.city() for _ in range(NUM_USERS)]
})

users.head()


Unnamed: 0,user_id,name,email,city
0,1,Tracey Gates,kevinmelendez@example.net,Allenberg
1,2,Tara Porter,annecook@example.net,Adrianhaven
2,3,Miss Joanna Sloan,jennifer02@example.com,Mooreport
3,4,John Morrison,colleenhenderson@example.com,West Robinland
4,5,James Mcguire,morrisashley@example.org,Villegasberg


In [6]:
categories = ["Shirts", "Shoes", "Pants", "Dresses", "Accessories"]
brands = ["Nike", "Adidas", "Zara", "H&M", "Uniqlo", "Levis"]

products = pd.DataFrame({
    "product_id": range(1, NUM_PRODUCTS + 1),
    "category": np.random.choice(categories, NUM_PRODUCTS),
    "brand": np.random.choice(brands, NUM_PRODUCTS),
    "price": np.round(np.random.uniform(10, 200, NUM_PRODUCTS), 2)
})

products.head()


Unnamed: 0,product_id,category,brand,price
0,1,Pants,Zara,105.83
1,2,Accessories,H&M,33.66
2,3,Shoes,Adidas,185.13
3,4,Accessories,Adidas,49.92
4,5,Shirts,H&M,146.85


In [7]:
purchases = pd.DataFrame({
    "purchase_id": range(1, NUM_PURCHASES + 1),
    "user_id": np.random.randint(1, NUM_USERS + 1, NUM_PURCHASES),
    "product_id": np.random.randint(1, NUM_PRODUCTS + 1, NUM_PURCHASES),
    "quantity": np.random.randint(1, 4, NUM_PURCHASES)
})

purchases.head()


Unnamed: 0,purchase_id,user_id,product_id,quantity
0,1,9855,4666,1
1,2,157,4282,1
2,3,7566,1444,3
3,4,4174,555,2
4,5,1997,3385,2


In [8]:
train, test = train_test_split(purchases, test_size=0.2, random_state=42)

train["interaction"] = 1


In [9]:
user_item = train.pivot_table(
    index="user_id",
    columns="product_id",
    values="interaction",
    fill_value=0
)


In [10]:
products["price_bucket"] = pd.cut(
    products["price"],
    bins=[0, 50, 100, 200],
    labels=["budget", "mid", "premium"]
)

products["features"] = (
    products["category"] + " " +
    products["brand"] + " " +
    products["price_bucket"].astype(str)
)

vectorizer = CountVectorizer()
feature_matrix = vectorizer.fit_transform(products["features"])
content_similarity = cosine_similarity(feature_matrix)


In [11]:
item_similarity = cosine_similarity(user_item.T)


In [12]:
popular_products = (
    train.groupby("product_id")
    .size()
    .sort_values(ascending=False)
    .index
    .tolist()
)


In [13]:
def precision_at_k(recommended, relevant, k=10):
    return len(set(recommended[:k]) & set(relevant)) / k

def recall_at_k(recommended, relevant, k=10):
    return len(set(recommended[:k]) & set(relevant)) / len(relevant)

def average_precision(recommended, relevant):
    score = 0
    hits = 0
    for i, r in enumerate(recommended):
        if r in relevant:
            hits += 1
            score += hits / (i + 1)
    return score / len(relevant)


In [16]:
def hybrid_recommend(user_id, item_id, k=10):
    # 1. Collaborative Filtering (Item-Item Similarity)
    # Get items the user has already interacted with
    user_past_interactions = user_item.loc[user_id]
    purchased_items = user_past_interactions[user_past_interactions > 0].index.tolist()

    collaborative_recs = set()
    for purchased_item_id in purchased_items:
        if purchased_item_id in user_item.columns:
            # Get similar items based on collaborative filtering
            item_idx = user_item.columns.get_loc(purchased_item_id)
            similar_items_cf = item_similarity[item_idx]
            # Get product_ids of similar items, sorted by similarity
            similar_item_ids_cf = user_item.columns[similar_items_cf.argsort()[::-1]].tolist()
            # Filter out already purchased items
            similar_item_ids_cf = [i for i in similar_item_ids_cf if i not in purchased_items]
            collaborative_recs.update(similar_item_ids_cf[:k])

    # 2. Content-Based Filtering
    content_recs = set()
    if item_id in products["product_id"].values:
        item_product_id_index = products[products["product_id"] == item_id].index[0]
        # Get similar items based on content features
        similar_items_cb = content_similarity[item_product_id_index]
        # Get product_ids of similar items, sorted by similarity
        similar_item_ids_cb = products.iloc[similar_items_cb.argsort()[::-1]]["product_id"].tolist()
        # Filter out the item itself and already purchased items
        similar_item_ids_cb = [i for i in similar_item_ids_cb if i != item_id and i not in purchased_items]
        content_recs.update(similar_item_ids_cb[:k])

    # Combine recommendations (simple union for now)
    combined_recs = list(collaborative_recs.union(content_recs))

    # Remove items user already purchased
    final_recs = [rec for rec in combined_recs if rec not in purchased_items]

    # Fill with popular products if not enough recommendations
    if len(final_recs) < k:
        for p_id in popular_products:
            if p_id not in purchased_items and p_id not in final_recs:
                final_recs.append(p_id)
            if len(final_recs) >= k:
                break

    return final_recs[:k]

def evaluate_user(user_id):
    relevant = test[test.user_id == user_id].product_id.tolist()

    if not relevant:
        return None

    # Use the first relevant item as a seed for content-based part
    recs = hybrid_recommend(user_id, relevant[0])

    return (
        precision_at_k(recs, relevant),
        recall_at_k(recs, relevant),
        average_precision(recs, relevant)
    )

results = [evaluate_user(u) for u in test.user_id.unique()]
results = [r for r in results if r]

print("Precision@10:", np.mean([r[0] for r in results]))
print("Recall@10:", np.mean([r[1] for r in results]))
print("MAP:", np.mean([r[2] for r in results]))

Precision@10: 0.0006745362563237774
Recall@10: 0.002171414518589898
MAP: 0.0007268635625507464


In [17]:
!pip install gradio





In [18]:
import gradio as gr


In [22]:
products["label"] = (
    "ID " + products["product_id"].astype(str) +
    " | " + products["brand"] +
    " | " + products["category"] +
    " | $" + products["price"].astype(str)
)


In [23]:
product_lookup = dict(zip(products["label"], products["product_id"]))
user_ids = users["user_id"].sample(200).tolist()


In [24]:
def recommend_ui(user_id, product_label):
    product_id = product_lookup[product_label]
    recs = hybrid_recommend(int(user_id), int(product_id))

    result = products[products.product_id.isin(recs)][
        ["product_id", "brand", "category", "price"]
    ]

    return result


In [25]:
interface = gr.Interface(
    fn=recommend_ui,
    inputs=[
        gr.Dropdown(user_ids, label="Select User ID"),
        gr.Dropdown(products["label"].tolist(), label="Select Product")
    ],
    outputs=gr.Dataframe(label="Recommended Products"),
    title="Fashion Store Recommendation Engine",
    description="Hybrid recommendation system using collaborative and content-based filtering."
)

interface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4af2d6603ce5b9a23e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


