In [None]:
# Import necessary libraries
!pip install ipywidgets
import pandas as pd
import numpy as np
import torch
import ipywidgets as widgets
from IPython.display import display
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity




In [None]:
# Load the datasets
fashion_data = pd.read_csv('FashionDB.csv')
fashion_data.head()

Unnamed: 0,p_id,name,products,price,colour,brand,img,ratingCount,avg_rating,description,p_attributes
0,17048614,Khushal K Women Black Ethnic Motifs Printed Ku...,"Kurta, Palazzos, Dupatta",5099.0,Black,Khushal K,http://assets.myntassets.com/assets/images/170...,4522.0,4.418399,Black printed Kurta with Palazzos with dupatta...,"{'Add-Ons': 'NA', 'Body Shape ID': '443,333,32..."
1,16524740,InWeave Women Orange Solid Kurta with Palazzos...,"Kurta, Palazzos, Floral Print Dupatta",5899.0,Orange,InWeave,http://assets.myntassets.com/assets/images/165...,1081.0,4.119334,Orange solid Kurta with Palazzos with dupatta<...,"{'Add-Ons': 'NA', 'Body Shape ID': '443,333,32..."
2,16331376,Anubhutee Women Navy Blue Ethnic Motifs Embroi...,"Kurta, Trousers, Dupatta",4899.0,Navy Blue,Anubhutee,http://assets.myntassets.com/assets/images/163...,1752.0,4.16153,Navy blue embroidered Kurta with Trousers with...,"{'Add-Ons': 'NA', 'Body Shape ID': '333,424', ..."
3,14709966,Nayo Women Red Floral Printed Kurta With Trous...,"Kurta, Trouser, Dupatta",3699.0,Red,Nayo,http://assets.myntassets.com/assets/images/147...,4113.0,4.088986,Red printed kurta with trouser and dupatta<br>...,"{'Add-Ons': 'NA', 'Body Shape ID': '333,424', ..."
4,11056154,AHIKA Women Black & Green Printed Straight Kurta,Kurta,1350.0,Black,AHIKA,http://assets.myntassets.com/assets/images/110...,21274.0,3.978377,"Black and green printed straight kurta, has a ...","{'Body Shape ID': '424', 'Body or Garment Size..."


In [None]:
# Display the first few rows of each dataset to understand their structure
print("\nFashion Data Overview:")
print(fashion_data.head())



Fashion Data Overview:
       p_id                                               name  \
0  17048614  Khushal K Women Black Ethnic Motifs Printed Ku...   
1  16524740  InWeave Women Orange Solid Kurta with Palazzos...   
2  16331376  Anubhutee Women Navy Blue Ethnic Motifs Embroi...   
3  14709966  Nayo Women Red Floral Printed Kurta With Trous...   
4  11056154   AHIKA Women Black & Green Printed Straight Kurta   

                                products   price     colour      brand  \
0               Kurta, Palazzos, Dupatta  5099.0      Black  Khushal K   
1  Kurta, Palazzos, Floral Print Dupatta  5899.0     Orange    InWeave   
2               Kurta, Trousers, Dupatta  4899.0  Navy Blue  Anubhutee   
3                Kurta, Trouser, Dupatta  3699.0        Red       Nayo   
4                                  Kurta  1350.0      Black      AHIKA   

                                                 img  ratingCount  avg_rating  \
0  http://assets.myntassets.com/assets/images/170... 

In [None]:
# Basic statistics to understand numerical columns
print("\nFashion Data Statistics:")
print(fashion_data.describe())




Fashion Data Statistics:
               p_id         price   ratingCount   avg_rating
count  1.421400e+04  14214.000000   6530.000000  6530.000000
mean   1.569275e+07   2970.434009    184.312251     4.101044
std    3.152415e+06   2570.232988    782.464972     0.475756
min    7.016600e+04    169.000000      1.000000     1.000000
25%    1.415589e+07   1599.000000      9.000000     3.888889
50%    1.638232e+07   2222.000000     23.000000     4.180940
75%    1.807670e+07   3498.000000     80.000000     4.392641
max    1.941576e+07  47999.000000  21274.000000     5.000000


In [None]:
# Check for missing values
print("\nMissing Values in Fashion Data:")
print(fashion_data.isnull().sum())


Missing Values in Fashion Data:
p_id               0
name               0
products           0
price              0
colour             0
brand              0
img                0
ratingCount     7684
avg_rating      7684
description        0
p_attributes       0
dtype: int64


In [None]:
fashion_data.head(2)

Unnamed: 0,p_id,name,products,price,colour,brand,img,ratingCount,avg_rating,description,p_attributes
0,17048614,Khushal K Women Black Ethnic Motifs Printed Ku...,"Kurta, Palazzos, Dupatta",5099.0,Black,Khushal K,http://assets.myntassets.com/assets/images/170...,4522.0,4.418399,Black printed Kurta with Palazzos with dupatta...,"{'Add-Ons': 'NA', 'Body Shape ID': '443,333,32..."
1,16524740,InWeave Women Orange Solid Kurta with Palazzos...,"Kurta, Palazzos, Floral Print Dupatta",5899.0,Orange,InWeave,http://assets.myntassets.com/assets/images/165...,1081.0,4.119334,Orange solid Kurta with Palazzos with dupatta<...,"{'Add-Ons': 'NA', 'Body Shape ID': '443,333,32..."


In [None]:
print(fashion_data.dtypes)

p_id              int64
name             object
products         object
price           float64
colour           object
brand            object
img              object
ratingCount     float64
avg_rating      float64
description      object
p_attributes     object
dtype: object


## Data Processing

In [None]:
# Fill missing values with an empty string to avoid issues during tokenization
fashion_data.fillna('', inplace=True)

# Combine the product name and description into a single text column for datasets
#fashion_data['text'] = fashion_data['name'] + ' ' + fashion_data['description']
fashion_data['text'] = fashion_data['name'] + ' ' + fashion_data['products'] + ' ' + fashion_data['description']

#fashion_data = fashion_data[['p_id', 'text']]

# Check the new 'text' columns
print("\nFashion Data Text Feature:")
print(fashion_data['text'].head())



Fashion Data Text Feature:
0    Khushal K Women Black Ethnic Motifs Printed Ku...
1    InWeave Women Orange Solid Kurta with Palazzos...
2    Anubhutee Women Navy Blue Ethnic Motifs Embroi...
3    Nayo Women Red Floral Printed Kurta With Trous...
4    AHIKA Women Black & Green Printed Straight Kur...
Name: text, dtype: object


  fashion_data.fillna('', inplace=True)


In [None]:
print(fashion_data.isnull().sum())

p_id            0
name            0
products        0
price           0
colour          0
brand           0
img             0
ratingCount     0
avg_rating      0
description     0
p_attributes    0
text            0
dtype: int64


## Feature Engineering with Hugging Face LLM

In [None]:
# Load the tokenizer and model from Hugging Face
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Function to create embeddings for the text data
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).numpy()

# Apply embedding generation to datasets
fashion_data['embeddings'] = fashion_data['text'].apply(lambda x: get_embeddings(x))

# Check the embeddings
print("\nFashion Data Embeddings (first product):")
print(fashion_data['embeddings'].iloc[0])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.



Fashion Data Embeddings (first product):
[[-2.03127310e-01  1.92911416e-01 -1.72543168e-01  2.72916555e-02
  -2.31151044e-01  6.51585236e-02 -1.67150553e-02 -1.48075730e-01
  -1.01424687e-01  3.89934927e-02  1.53135464e-01 -2.54908711e-01
   1.99303832e-02 -9.29605812e-02 -1.06368050e-01  1.46989552e-02
   1.27419978e-01  2.88518518e-02  2.20043175e-02  9.74578597e-03
   2.65499968e-02 -2.31566116e-01  1.35208875e-01 -3.47889960e-02
  -1.49333656e-01  9.07385051e-02  6.93964958e-02 -6.33367375e-02
   1.55590013e-01 -1.28529012e-01 -1.01944014e-01  2.33390987e-01
  -2.08662525e-01  3.41909286e-03  1.86257586e-01  1.72733441e-02
  -1.00911237e-01  8.90748650e-02  6.61811382e-02  3.15704159e-02
  -1.97853744e-02 -6.06855899e-02 -1.03300005e-01 -2.65941303e-02
  -1.15164153e-01 -1.40356690e-01 -1.22510515e-01  1.96327835e-01
  -1.88697189e-01 -1.46458536e-01  1.01445146e-01 -8.85382891e-02
  -6.54276088e-02  1.04543403e-01  1.90333754e-01 -1.83712572e-01
  -2.23558366e-01 -3.83781493e-02 

## Model Selection (Cosine Similarity)

In [None]:
# Compute cosine similarity between products in each dataset
fashion_similarity = cosine_similarity(np.vstack(fashion_data['embeddings'].values))

# Function to get product recommendations based on cosine similarity
def recommend_products(similarity_matrix, data, product_idx, top_n=5):

    # Get similarity scores for the selected product
    similarity_scores = similarity_matrix[product_idx]

    # Sort the scores in descending order and return the top N recommendations
    similar_products = similarity_scores.argsort()[-top_n-1:-1][::-1]

    return data.iloc[similar_products]

# Recommendation for a fashion product (e.g., index 0)
fashion_idx = 0
recommended_fashion = recommend_products(fashion_similarity, fashion_data, fashion_idx)
print("\nRecommended Fashion Products based on product at index {fashion_idx}:")
print(recommended_fashion['name'])



Recommended Fashion Products based on product at index {fashion_idx}:
471    Khushal K Women White Ethnic Motifs Printed Go...
9      Khushal K Women Green & Pink Printed Pure Cott...
88     Khushal K Women Green Ethnic Motifs Printed Go...
920    Prakhya Women Black Ethnic Motifs Printed Kurt...
406    HERE&NOW Women Blue Ethnic Motifs Printed Regu...
Name: name, dtype: object


## Building the Recommendation System, Evaluation and Testing

In [None]:
# Recommendation for a fashion product with index 3
fashion_idx_test = 1
recommended_fashion_test = recommend_products(fashion_similarity, fashion_data, fashion_idx_test)
print("\nTest Recommended Fashion Products based on product at index {fashion_idx_test}:")
print(recommended_fashion_test['name'])



Test Recommended Fashion Products based on product at index {fashion_idx_test}:
560    KALINI Women Orange Printed Pure Cotton Kurta ...
291    Vishudh Women Blue Floral Printed Kurta with P...
351    Myshka Women Green Kurta with Palazzos & With ...
137    Indo Era Women Pink Floral Embroidered Kurta w...
942    Myshka Women Red Kurta with Palazzos & With Du...
Name: name, dtype: object


## Interactive Widgets with ipywidgets

In [None]:
# Create a dropdown for selecting a product
product_dropdown = widgets.Dropdown(
    options=[(fashion_data['name'].iloc[i], i) for i in range(len(fashion_data))],
    description='Select Product:',
    continuous_update=False
)

# Create an output area to display the recommended products
output_area = widgets.Output()

In [None]:
# Function to update recommendations based on dropdown selection
def update_recommendations(product_idx):
    with output_area:
        output_area.clear_output()  # Clear previous output
        recommended_products = recommend_products(fashion_similarity, fashion_data, product_idx)
        # Display the recommended products
        display(recommended_products[['name', 'description']])

# Link the dropdown widget to the update function
widgets.interactive(update_recommendations, product_idx=product_dropdown)

# Display the dropdown and the output area
display(product_dropdown, output_area)

Dropdown(description='Select Product:', options=(('Khushal K Women Black Ethnic Motifs Printed Kurta with Pala…

Output()