# Sistema de recomendacion de prendas basado en Implicit

In [7]:
# librerías

import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

import implicit

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [8]:
#!pip install implicit

## Paso 2: Cargando los datos

Se cargan los datos de nuestro data set para análisis de registros de prendas

In [10]:
'''
    Cargamos nuestro data set con los rankins
    
    Como podemos ver contiene las columnas: 
        fit: fit feedback
        user_id: a unique id for the customer
        bust: bust measurement of customer
        item_id: unique product id
        weight: weight of the product
        rating: dating of the product 0-10
        rented for: reason of rent
        review_text: customer comments
        body type: clothe body type
        review_summary: short customer comments
        category: the category of the product
        height: height of the customer
        size: the standardized size of the product
        age: customer age
        review_date: date of review
'''

my_dt = pd.read_json('renttherunway_final_data.json', lines=True).drop_duplicates(keep="last", subset=['item_id'])
my_dt.head(5)

Unnamed: 0,fit,user_id,bust size,item_id,weight,rating,rented for,review_text,body type,review_summary,category,height,size,age,review_date
341,fit,815707,34b,2960913,,10.0,other,I rented this as a piece to wear on my birthda...,pear,Love this skirt,skirt,"5' 5""",24,35.0,"December 4, 2017"
648,large,749956,,1288868,,10.0,date,Fits larger in the body but it's intended to h...,full bust,Cute flounce hem at the bottom and slight ruff...,dress,"5' 2""",16,29.0,"June 14, 2017"
677,fit,317637,32d,2489222,120lbs,10.0,everyday,This looked great and provided an extra layer ...,hourglass,Great look,cape,"5' 4""",8,48.0,"November 10, 2017"
798,fit,965639,34b,291513,160lbs,8.0,wedding,Great LBD. Loved all my photos in it. 4 stars ...,hourglass,"Great thick fabric, fits true to size (for the...",dress,"5' 6""",16,27.0,"November 21, 2017"
1005,fit,981306,32b,1041724,125lbs,10.0,wedding,I wore the medium which was my back up size. ...,petite,This dress is made for dancing!,dress,"5' 3""",14,50.0,"December 30, 2017"


Se genera un dataset solo con los datos necesarios para el análisis

In [81]:
columns = ['user_id','item_id','rating','review_date','category']
clothes_df = my_dt.loc[:,columns]
clothes_df.head(5)

Unnamed: 0,user_id,item_id,rating,review_date,category
341,815707,2960913,10.0,"December 4, 2017",skirt
648,749956,1288868,10.0,"June 14, 2017",dress
677,317637,2489222,10.0,"November 10, 2017",cape
798,965639,291513,8.0,"November 21, 2017",dress
1005,981306,1041724,10.0,"December 30, 2017",dress


In [60]:
# Contar registros por id
conteo_por_id = clothes_df.groupby('user_id').size().reset_index(name='conteo')
conteo_por_id_sorted = conteo_por_id.sort_values(by='conteo', ascending=False)
print(conteo_por_id_sorted.head(5))

      user_id  conteo
1692   362951      59
3214   691468      23
4625   981193      17
3442   742630      17
151     32925      16


## Paso 3: Transformando los datos

In [83]:
def create_X(df: pd.DataFrame):

    N = df['user_id'].nunique()
    M = df['item_id'].nunique()

    user_mapper = dict(zip(np.unique(df["user_id"]), list(range(N))))
    clothe_mapper = dict(zip(np.unique(df["item_id"]), list(range(M))))
    
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["user_id"])))
    clothe_inv_mapper = dict(zip(list(range(M)), np.unique(df["item_id"])))
    
    user_index = [user_mapper[i] for i in df['user_id']]
    clothe_index = [clothe_mapper[i] for i in df['item_id']]

    X = csr_matrix((df["rating"], (clothe_index, user_index)), shape=(M, N))
    
    return X, user_mapper, clothe_mapper, user_inv_mapper, clothe_inv_mapper

In [84]:
X, user_mapper, clothe_mapper, user_inv_mapper, clothe_inv_mapper = create_X(clothes_df)

### Creando los Mapeos de las categorías de las prendas

In [22]:
#!pip install fuzzywuzzy

In [85]:
from fuzzywuzzy import process

def clothe_finder(title):
    all_titles = clothes_df['category'].tolist()
    closest_match = process.extractOne(title,all_titles)
    return closest_match[0]

clothe_title_mapper = dict(zip(clothes_df['category'], clothes_df['item_id']))
clothe_title_inv_mapper = dict(zip(clothes_df['item_id'], clothes_df['category']))

def get_clothe_index(title):
    fuzzy_title = clothe_finder(title)
    clothe_id = clothe_title_mapper[fuzzy_title]
    clothe_idx = clothe_mapper[clothe_id]
    return clothe_idx

def get_clothe_title(clothe_idx): 
    clothe_id = clothe_inv_mapper[clothe_idx]
    title = clothe_title_inv_mapper[clothe_id]
    return title 

In [86]:
print(clothe_inv_mapper[4587])

2375866


Vamos a probar esta funcion para obtener el indice de `2489222`. 

In [87]:
get_clothe_index('2489222')

4587

Utilizemos el indice obtenido con la funcion get_clothe_title(). Tendremos que obtener la categorpia de la prenda 2489222.

In [88]:
get_clothe_title(4587)

'skirt'

## Paso 4: Construyendo el modelo de modelo de Recomendacion de Feedback Implicito

In [45]:
model = implicit.als.AlternatingLeastSquares(factors=50)

  check_blas_config()


In [46]:
model.fit(X.T.tocsr())

  0%|          | 0/15 [00:00<?, ?it/s]

In [89]:
clothe_of_interest = '2489222'

clothe_index = get_clothe_index(clothe_of_interest)
related = model.similar_items(clothe_index)
related

(array([4587, 4377,    4, 5229,   67, 5556, 4206, 4340, 2889, 3825]),
 array([0.99999994, 0.9056287 , 0.9027173 , 0.89262044, 0.8920612 ,
        0.89151657, 0.89104706, 0.8891136 , 0.88850164, 0.8869274 ],
       dtype=float32))

In [90]:
print(f"Si adquiriste la prenda {clothe_finder(clothe_of_interest)} te pueden interesar las siguientes prendas:")
for t, r in zip(related[0], related[1]):
    
    recommended_title = get_clothe_title(t)
    if recommended_title != clothe_finder(clothe_of_interest):
        print(recommended_title)

Si adquiriste la prenda skirt te pueden interesar las siguientes prendas:
down
dress
blouse
dress
pants
sheath
shift


## Paso 5: Generando las recomendaciones del usuario

Una caracteristica interesante de implicit es que puedes obtener recomendaciones personalizadas para un usuario determinado. Intentemos ver los resultados con un usuario especifico de nuestro dataset.

In [91]:
user_id = 691468

In [92]:
user_ratings = clothes_df[clothes_df['user_id']==user_id].merge(clothes_df[['item_id', 'category']])
user_ratings = user_ratings.sort_values('rating', ascending=False)
print(f"El numero de prendas rankeadas por el usuario {user_id} es de: {user_ratings['item_id'].nunique()}")

El numero de prendas rankeadas por el usuario 691468 es de: 23


In [93]:
user_ratings = clothes_df[clothes_df['user_id']==user_id].merge(clothes_df[['item_id', 'category']])
user_ratings = user_ratings.sort_values('rating', ascending=False)
top_5 = user_ratings.head()
top_5

Unnamed: 0,user_id,item_id,rating,review_date,category
0,691468,2048890,10.0,"November 10, 2017",jumpsuit
9,691468,2049439,10.0,"May 18, 2017",romper
15,691468,1346952,10.0,"June 29, 2017",dress
14,691468,663524,10.0,"November 30, 2017",dress
13,691468,220435,10.0,"December 8, 2016",dress


Las prendas de menor rating son:

In [94]:
bottom_5 = user_ratings[user_ratings['rating']<9].tail()
bottom_5

Unnamed: 0,user_id,item_id,rating,review_date,category
12,691468,1689146,8.0,"December 12, 2017",dress
7,691468,1964998,8.0,"October 31, 2017",dress
4,691468,849722,8.0,"November 8, 2017",dress
3,691468,941248,8.0,"May 26, 2017",dress
22,691468,239232,8.0,"July 13, 2017",maxi


Utilizaremos recommend() que utiliza el indice del usuario y lo transpone con la matriz user-item.

In [95]:
X_t = X.T.tocsr()
user_idx = user_mapper[user_id]
recommendations = model.recommend(user_idx, X_t[user_idx])
recommendations

(array([4839, 5095, 1796, 2353, 4321, 5170, 3899, 2778, 4126, 4516]),
 array([0.3849011 , 0.38484445, 0.31434727, 0.3123117 , 0.30779627,
        0.30769363, 0.26969293, 0.26798132, 0.26628482, 0.25997618],
       dtype=float32))

Por último se obtendrá la lista de categorías recomendadas para la prenda y el usuario analizado

In [96]:
for t, r in zip(recommendations[0], recommendations[1]):
    recommended_title = get_movie_title(t)
    print(recommended_title)

sweater
coat
dress
shift
top
skirt
overalls
shift
top
romper
