<a href="https://colab.research.google.com/github/diarrabell/fashion-recs/blob/main/capstone_recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is an exploratory notebook used to execute the recommendation system.

In [None]:
import os
import urllib
import zipfile

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error

Load dataset 

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/data

Mounted at /content/drive
/content/drive/MyDrive/data


In [None]:
product_catalog = pd.read_csv("/content/drive/MyDrive/data/product_catalog.csv")

data = product_catalog

data

Unnamed: 0,img_name,links,aesthetics
0,2000479686021,https://www.forever21.com/us/2000479686.html?d...,90s
1,2000479684021,https://www.forever21.com/us/2000479684.html?d...,90s
2,2000481956019,https://www.forever21.com/us/2000481956.html?d...,90s
3,2000479687032,https://www.forever21.com/us/2000479687.html?d...,90s
4,2000480113012,https://www.forever21.com/us/2000480113.html?d...,90s
...,...,...,...
244,20004889320903,https://www.forever21.com/us/2000488932.html?d...,70s boho
245,20004790070203,https://www.forever21.com/us/2000479007.html?d...,70s boho
246,20004769630102,https://www.forever21.com/us/2000476963.html?d...,70s boho
247,20004546180202,https://www.forever21.com/us/2000454618.html?d...,70s boho


Prepare Data - 
One-hot encoding to use aesthetics as features

In [None]:
# Get vector representations of genre
vec = CountVectorizer()
genres_vec = vec.fit_transform(data['aesthetics'])

# Display resulting feature vectors
genres_vectorized = pd.DataFrame(genres_vec.todense(),columns=vec.get_feature_names_out(),index=data.img_name)
genres_vectorized

Unnamed: 0_level_0,70s,80s,90s,boho,cottagecore,goth,kawaii,y2k
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000479686021,0,0,1,0,0,0,0,0
2000479684021,0,0,1,0,0,0,0,0
2000481956019,0,0,1,0,0,0,0,0
2000479687032,0,0,1,0,0,0,0,0
2000480113012,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...
20004889320903,1,0,0,1,0,0,0,0
20004790070203,1,0,0,1,0,0,0,0
20004769630102,1,0,0,1,0,0,0,0
20004546180202,1,0,0,1,0,0,0,0


Calculate cosine similarity of items

In [None]:
# Build similarity marrix of movies based on similarity of genres
csmatrix = cosine_similarity(genres_vec)
csmatrix = pd.DataFrame(csmatrix,columns=data.img_name,index=data.img_name)
csmatrix


img_name,2000479686021,2000479684021,2000481956019,2000479687032,2000480113012,2000483681023,2000479685012,2000479683011,2000475206011,2000475150063,...,20004898380702,20004816180803,20004781700101,20004607410702,20004889380104,20004889320903,20004790070203,20004769630102,20004546180202,20004625030102
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000479686021,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2000479684021,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2000481956019,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2000479687032,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2000480113012,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20004889320903,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
20004790070203,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
20004769630102,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
20004546180202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Generate recommendations using cosine similarity

In [None]:

input_df = pd.DataFrame(np.array([['test', 'kawaii']]),
                   columns=['img_name', 'aesthetics'])

# print(input)

# append test row to training set 
test_df = pd.concat([data,input_df]).reset_index(drop=True)
# print(test_df.tail())

# rebuild cosine matrix 
genres_vec = vec.transform(test_df['aesthetics'])

# # Display resulting feature vectors
# genres_vectorized = pd.DataFrame(genres_vec.todense(),columns=vec.get_feature_names_out(),index=test_df.img_name)
# genres_vectorized

# Build similarity marrix of movies based on similarity of genres
csmatrix = cosine_similarity(genres_vec)
csmatrix = pd.DataFrame(csmatrix,columns=test_df.img_name,index=test_df.img_name)

img = input_df['img_name'][0]
print(img)

sims = csmatrix.loc[img,:]
mostsimilar = sims.sort_values(ascending=False).index.values

# get the top 5 recommendations; remove self
recs = np.delete(mostsimilar, np.where(mostsimilar == img))[:10]

print('recs: ', recs)


test
recs:  [20004820200301 20004792320201 20004779540101 20004794740201
 20004823120101 20004761450401 20004797800103 20004781990101
 20004848040201 20004834790301]


In [None]:
# return from original catalog
ret = data.loc[data['img_name'].isin(recs)]
ret

Unnamed: 0,img_name,links,aesthetics
166,20004781990101,https://www.forever21.com/us/2000478199.html?d...,cottagecore
176,20004794740201,https://www.forever21.com/us/2000479474.html?d...,cottagecore
202,20004820200301,https://www.forever21.com/us/2000482020.html?d...,kawaii boho
207,20004834790301,https://www.forever21.com/us/2000483479.html?d...,kawaii boho
208,20004848040201,https://www.forever21.com/us/2000484804.html?d...,kawaii boho
210,20004781990101,https://www.forever21.com/us/2000478199.html?d...,kawaii boho
211,20004797800103,https://www.forever21.com/us/2000479780.html?d...,kawaii boho
212,20004761450401,https://www.forever21.com/us/2000476145.html?d...,kawaii boho
213,20004823120101,https://www.forever21.com/us/2000482312.html?d...,kawaii boho
214,20004794740201,https://www.forever21.com/us/2000479474.html?d...,kawaii boho
