In [1]:
pip install pandas scikit-learn transformers streamlit umap-learn


Collecting umap-learn
  Using cached umap_learn-0.5.6-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Using cached pynndescent-0.5.12-py3-none-any.whl.metadata (6.8 kB)
Using cached umap_learn-0.5.6-py3-none-any.whl (85 kB)
Using cached pynndescent-0.5.12-py3-none-any.whl (56 kB)
Installing collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.12 umap-learn-0.5.6
Note: you may need to restart the kernel to use updated packages.


Loading Data

In [2]:
import pandas as pd

data = pd.read_csv('fashion_products.csv')
print(data.head())


   User ID  Product ID Product Name   Brand         Category  Price    Rating  \
0       19           1        Dress  Adidas    Men's Fashion     40  1.043159   
1       97           2        Shoes     H&M  Women's Fashion     82  4.026416   
2       25           3        Dress  Adidas  Women's Fashion     44  3.337938   
3       57           4        Shoes    Zara    Men's Fashion     23  1.049523   
4       79           5      T-shirt  Adidas    Men's Fashion     79  4.302773   

    Color Size  
0   Black   XL  
1   Black    L  
2  Yellow   XL  
3   White    S  
4   Black    M  


Data Preprocessing

In [5]:
from transformers import DistilBertTokenizer

# Initialize the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Tokenize the product descriptions
def tokenize_descriptions(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt")

data['tokenized_product_info'] = (data['Product Name'] + ' ' + data['Brand']).apply(tokenize_descriptions)


In [4]:
print(data.columns)

Index(['User ID', 'Product ID', 'Product Name', 'Brand', 'Category', 'Price',
       'Rating', 'Color', 'Size'],
      dtype='object')


Embedding Generation

In [7]:
from transformers import DistilBertModel
import torch

# Load the model
model = DistilBertModel.from_pretrained('distilbert-base-uncased')

# Function to generate embeddings
def generate_embeddings(tokenized_text):
    with torch.no_grad():
        return model(**tokenized_text).last_hidden_state[:,0,:].numpy()

data['embeddings'] = data['tokenized_product_info'].apply(generate_embeddings)


Dimensionality Reduction

In [8]:
import umap
import numpy as np

# Stack embeddings into a numpy array
embeddings = np.vstack(data['embeddings'].values)

# Reduce dimensions
reducer = umap.UMAP(n_neighbors=5, n_components=50, metric='cosine')
umap_embeddings = reducer.fit_transform(embeddings)

data['umap_embeddings'] = list(umap_embeddings)


Building the Recommendation System

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

# Function to recommend products
def recommend_products(product_index, num_recommendations=5):
    product_embedding = np.array(data.loc[product_index, 'umap_embeddings']).reshape(1, -1)
    similarities = cosine_similarity(product_embedding, np.vstack(data['umap_embeddings'].values)).flatten()
    indices = similarities.argsort()[-num_recommendations:][::-1]
    return data.iloc[indices]

# Test the function
recommendations = recommend_products(0)
print(recommendations[['Product ID','Product Name', 'Rating']])


     Product ID Product Name    Rating
0             1        Dress  1.043159
226         227        Dress  4.144274
348         349        Dress  3.150290
683         684        Dress  3.941315
15           16        Dress  1.422716


In [13]:
# streamlit_app.py
import streamlit as st

st.title('Fashion Product Recommendation System')

product_id = st.number_input('Enter product ID', min_value=0, max_value=len(data)-1, value=0)
num_recommendations = st.slider('Number of recommendations', min_value=1, max_value=10, value=5)

if st.button('Recommend'):
    recommended_products = recommend_products(product_id, num_recommendations)
    st.write(recommended_products[['Product ID','Product Name','Rating']])


2024-05-18 12:19:49.442 
  command:

    streamlit run /opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
