In [11]:
import pandas as pd
import sklearn.metrics as metrics
import fca as fca

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# Recomendador con Explicaciones

En este notebook ejecutaremos el experimento para el ICCBR19. Usaremos la matriz $Q_u$ para obetener las K películas más similares a la que le vamos a recomendar. A continuación, mostraremos el retículo de propiedades en común de ese conjunto de películas usando la técnica **FCA**.

## Seleccionar usuario y película

Seleccionamos el usuario y la película que vamos a recomendar. Antes mostraremos las películas en orden del rating que se calcula (de mayor a menor).

In [12]:
user = 6
k_movies = 10

In [13]:
# Cargamos la matriz q_u del usuario
path = 'data/matrices_data/q_user_' + str(user) + '.csv'
q_u_DF = pd.read_csv(path)
q_u_DF['predictedRating'] = q_u_DF.apply(lambda row: sum(row[1:]), axis=1)
q_u_DF.sort_values(by='predictedRating', ascending=False)['movieId'][:k_movies].values

array([ 1089,   593,   367,  5349,   541,   316,  2959,   480,  1206,
       33794])

In [14]:
movieId_to_recommend = 1089

## Calculamos la matriz de similitud de películas

Usando la función de similitud `cosine`, `euclidean` o `manhattan`, calculamos la matriz de similitud de todas las películas entre ellas. Para ello usaremos el vector dentro de $Q_u$ que tiene cada película. Mostramos la matriz de similitudes en un mapa de calor para saber la dispersión de las similitudes.

In [15]:
similarity_function = 'cosine'

In [16]:
matrix_qu_DF = q_u_DF.drop(columns=['movieId', 'predictedRating'])
items_sim_DF = pd.DataFrame(metrics.pairwise_distances(matrix_qu_DF, metric='cosine'))
items_sim_DF.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.0,0.446038,0.358642,0.403814,0.222171,0.336008,0.352832,0.300286,0.289402,0.24376,...,0.255116,0.316164,0.21834,0.20824,0.307233,0.290463,0.132232,0.260636,0.311674,0.316425
1,0.446038,0.0,0.168146,0.338368,0.205351,0.145732,0.087385,0.239753,0.202037,0.171303,...,0.111035,0.156556,0.241976,0.294591,0.181611,0.216816,0.289902,0.180695,0.133713,0.148164
2,0.358642,0.168146,0.0,0.597371,0.344982,0.307084,0.130002,0.109233,0.124408,0.035723,...,0.147625,0.061331,0.067142,0.332127,0.387168,0.081688,0.352192,0.056357,0.197568,0.134462
3,0.403814,0.338368,0.597371,0.0,0.167248,0.109397,0.248552,0.465592,0.501799,0.56404,...,0.283261,0.466805,0.541039,0.212766,0.211625,0.643346,0.181658,0.428723,0.234597,0.359993
4,0.222171,0.205351,0.344982,0.167248,0.0,0.120133,0.194366,0.308662,0.29339,0.276239,...,0.152977,0.308913,0.309233,0.086061,0.086622,0.410685,0.053899,0.26416,0.089742,0.187576


In [17]:
trace = go.Heatmap(z=items_sim_DF.values,
                   x=items_sim_DF.index,
                   y=items_sim_DF.index,
                   colorscale=[[0.0, 'rgb(255,255,255)'], [1.0, 'rgb(31, 119, 180)']])

data=[trace]
iplot(data, filename='labelled-heatmap')

## Recuperar las K películas más similares

Recuperamos, a partir de esa matriz de similitudes, las K películas más similares a la película recomendada.

In [18]:
movies_SE = q_u_DF['movieId']
movie_index = movies_SE[movies_SE == movieId_to_recommend].index[0]
most_similar_items_index = items_sim_DF[movie_index].sort_values(ascending=False)[:k_movies].index
most_similar_items = movies_SE[most_similar_items_index.values].values

In [19]:
most_similar_items

array([1221, 2628,   34, 5952, 1136,  858, 4226, 1197, 1265, 7153])

## Mostrar retículo

Mostramos el retículo usando la técnica **FCA** de la pelicula a recomendar, junto con las K películas más similares.

In [20]:
#fca.calcular_reticulo('data/experiment_data/movies_binary.csv', movieId_to_recommend, most_similar_items)