In [1]:
# En este ejemplo, crearemos un sistema de recomendaciones que recomiende películas a los usuarios
#Utilizaremos el conjunto de datos Movielens. Un conjunto de datos gratuito que contiene miles de calificaciones de películas por parte de los usuarios.

In [2]:
#Filtro basado en el contenido con el dataset de movielens
#Vamos a utilizar las características de las peliculas para dar recomendaciones.
#no vamos a tener en cuenta las puntuaciones de otros usuarios a la películas

In [2]:
import pandas as pd

In [25]:
import numpy as np

In [3]:
#Para poder usar la columna movie_genre, tenemos que convertirla en un grupo de campos llamados dummy_variables.

#Esta función convierte una variable categórica (por ejemplo, el genéro de la película puede ser Animation,
#Comedy, Romance...), en múltiples columnas,uns para cada categoría

#Para cada película, éstas columnas dummy tendrán un valor de 0 excepto para aquellos géneros que tenga la película.

movies = pd.read_table('movielens/movies.dat', header=None, sep='::', 
                          names=['movie_id', 'movie_title', 'movie_genre'], engine='python')
movies = pd.concat([movies, movies.movie_genre.str.get_dummies(sep='|')], axis=1)

In [4]:
movie_categories = movies.columns[3:].tolist()

In [5]:
movies.head()

Unnamed: 0,movie_id,movie_title,movie_genre,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),Animation|Children's|Comedy,0,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),Adventure|Children's|Fantasy,0,1,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),Comedy|Romance,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),Comedy|Drama,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Father of the Bride Part II (1995),Comedy,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
#la película con una id de 2, Jumanji, pertenece a los géneros Adventure, Children's y Fantasy

movies.loc[1]

movie_id                                  2
movie_title                  Jumanji (1995)
movie_genre    Adventure|Children's|Fantasy
Action                                    0
Adventure                                 1
Animation                                 0
Children's                                1
Comedy                                    0
Crime                                     0
Documentary                               0
Drama                                     0
Fantasy                                   1
Film-Noir                                 0
Horror                                    0
Musical                                   0
Mystery                                   0
Romance                                   0
Sci-Fi                                    0
Thriller                                  0
War                                       0
Western                                   0
Name: 1, dtype: object

In [17]:
movie_categories

['Action',
 'Adventure',
 'Animation',
 "Children's",
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western']

In [18]:
movies.tail()

Unnamed: 0,movie_id,movie_title,movie_genre,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
3878,3948,Meet the Parents (2000),Comedy,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3879,3949,Requiem for a Dream (2000),Drama,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3880,3950,Tigerland (2000),Drama,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3881,3951,Two Family House (2000),Drama,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3882,3952,"Contender, The (2000)",Drama|Thriller,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [19]:
def get_user_preferences(user_movies):
    user_features = movies[movies.movie_title.isin(user_movies)].ix[:,3:].T
    user_features = user_features.mean(axis=1).reset_index()
    print(user_features)
    return user_features.ix[:,1].tolist()

In [20]:
ratings = pd.read_table('movielens/ratings.dat', header=None, sep='::', 
                          names=['user_id', 'movie_id', 'rating', 'timestamp'], engine='python')

#añadir el título de la película haciendo un join entre el movie_id de la tabla ratings y el movie_id de la tabla movies
ratings = pd.merge(ratings, movies, on='movie_id')[['user_id', 'movie_title', 'movie_id','rating', 'movie_genre']]

ratings.tail()

Unnamed: 0,user_id,movie_title,movie_id,rating,movie_genre
1000204,5949,Modulations (1998),2198,5,Documentary
1000205,5675,Broken Vessels (1998),2703,3,Drama
1000206,5780,White Boys (1999),2845,1,Drama
1000207,5851,One Little Indian (1973),3607,5,Comedy|Drama|Western
1000208,5938,"Five Wives, Three Secretaries and Me (1998)",2909,4,Documentary


In [21]:
#obtenemos las recomendaciones para un usuario,

#obtener las peliculas puntuadas por un determinado usuario
id_user = 50
user_movies = ratings[ratings.user_id==id_user].movie_title.tolist() 
user_movies

['Airplane! (1980)',
 'Mission: Impossible (1996)',
 'Gladiator (2000)',
 "Breakfast at Tiffany's (1961)",
 'Lost World: Jurassic Park, The (1997)',
 'American Beauty (1999)',
 'Children of a Lesser God (1986)',
 'Jurassic Park (1993)',
 'Donnie Brasco (1997)',
 'X-Men (2000)',
 'Final Destination (2000)',
 "But I'm a Cheerleader (1999)",
 'Gone in 60 Seconds (2000)',
 'Legends of the Fall (1994)',
 'Where the Heart Is (2000)',
 'Anna and the King (1999)',
 'Rudy (1993)',
 'Frequency (2000)',
 'Chicken Run (2000)',
 'Perfect Storm, The (2000)',
 'Doctor Dolittle (1967)',
 'Fantasia (1940)',
 'From Russia with Love (1963)',
 'Scary Movie (2000)',
 "Big Momma's House (2000)",
 'Cell, The (2000)',
 'Center Stage (2000)',
 "Rosemary's Baby (1968)",
 'I Still Know What You Did Last Summer (1998)',
 'Class (1983)',
 'Virgin Suicides, The (1999)',
 "She's the One (1996)",
 'Brothers McMullen, The (1995)',
 'Nutty Professor II: The Klumps (2000)',
 'Return to Me (2000)',
 "She's So Lovely (199

In [22]:
user_preferences_list = get_user_preferences(user_movies)

          index         0
0        Action  0.186047
1     Adventure  0.139535
2     Animation  0.069767
3    Children's  0.069767
4        Comedy  0.348837
5         Crime  0.046512
6   Documentary  0.000000
7         Drama  0.372093
8       Fantasy  0.000000
9     Film-Noir  0.000000
10       Horror  0.069767
11      Musical  0.046512
12      Mystery  0.046512
13      Romance  0.162791
14       Sci-Fi  0.093023
15     Thriller  0.186047
16          War  0.023256
17      Western  0.023256


In [23]:
def get_predicted_movie_score(movie_name, user_preferences): 
    movie_slice = movies[movies.movie_title==movie_name].iloc[0]
    movie_features = movie_slice[movie_categories]
    return np.dot(movie_features, user_preferences)

In [26]:
#Action +Sci-Fi + Thriller
get_predicted_movie_score('Armageddon (1998)', user_preferences_list)



0.60465116279069764

In [27]:
def get_movie_recommendations(user_preferences, number_recommendations):  
    movies['score'] = movies.movie_title.apply(get_predicted_movie_score, args=([user_preferences]))
    movies.sort_values(by=['score'], ascending=False, inplace=True)
    return movies[['movie_title','movie_genre','score']].head(number_recommendations)

get_movie_recommendations(user_preferences_list, 10)

Unnamed: 0,movie_title,movie_genre,score
1445,Best Men (1997),Action|Comedy|Crime|Drama,0.953488
1931,Lethal Weapon (1987),Action|Comedy|Crime|Drama,0.953488
1932,Lethal Weapon 2 (1989),Action|Comedy|Crime|Drama,0.953488
1849,Lethal Weapon 4 (1998),Action|Comedy|Crime|Drama,0.953488
1244,Diva (1981),Action|Drama|Mystery|Romance|Thriller,0.953488
1933,Lethal Weapon 3 (1992),Action|Comedy|Crime|Drama,0.953488
3197,Man Bites Dog (C'est arriv� pr�s de chez vous)...,Action|Comedy|Crime|Drama,0.953488
3115,Montana (1998),Action|Comedy|Crime|Drama,0.953488
1847,Buffalo 66 (1998),Action|Comedy|Drama,0.906977
386,Faster Pussycat! Kill! Kill! (1965),Action|Comedy|Drama,0.906977


In [28]:
#obtener recomendaciones a partir de los gustos del usuario

from collections import OrderedDict

user_preferences = OrderedDict(zip(movie_categories, []))

user_preferences['Action'] = 1  
user_preferences['Adventure'] = 1  
user_preferences['Animation'] = 5  
user_preferences["Children's"] = 1  
user_preferences["Comedy"] = 1  
user_preferences['Crime'] = 1  
user_preferences['Documentary'] = 1  
user_preferences['Drama'] = 1  
user_preferences['Fantasy'] = 1  
user_preferences['Film-Noir'] = 1  
user_preferences['Horror'] = 1  
user_preferences['Musical'] = 1  
user_preferences['Mystery'] = 1  
user_preferences['Romance'] = 1  
user_preferences['Sci-Fi'] = 5  
user_preferences['War'] = 1  
user_preferences['Thriller'] = 1  
user_preferences['Western'] =1  

user_preferences_list = list(user_preferences.values())

In [29]:
get_movie_recommendations(user_preferences_list, 10)

Unnamed: 0,movie_title,movie_genre,score
1187,"Transformers: The Movie, The (1986)",Action|Animation|Children's|Sci-Fi|Thriller|War,14
606,Heavy Metal (1981),Action|Adventure|Animation|Horror|Sci-Fi,13
1254,Akira (1988),Adventure|Animation|Sci-Fi|Thriller,12
2047,"Lord of the Rings, The (1978)",Adventure|Animation|Children's|Sci-Fi,12
3676,Titan A.E. (2000),Adventure|Animation|Sci-Fi,11
731,Ghost in the Shell (Kokaku kidotai) (1995),Animation|Sci-Fi,10
2426,"Fantastic Planet, The (La Plan�te sauvage) (1973)",Animation|Sci-Fi,10
3523,Time Masters (Les Ma�tres du Temps) (1982),Animation|Sci-Fi,10
2012,"Little Mermaid, The (1989)",Animation|Children's|Comedy|Musical|Romance,9
2011,Lady and the Tramp (1955),Animation|Children's|Comedy|Musical|Romance,9


In [30]:
jumanji_features = movies.loc[1][movie_categories]
jumanji_features

Action         0
Adventure      1
Animation      0
Children's     1
Comedy         0
Crime          0
Documentary    0
Drama          0
Fantasy        1
Film-Noir      0
Horror         0
Musical        0
Mystery        0
Romance        0
Sci-Fi         0
Thriller       0
War            0
Western        0
Name: 1, dtype: object

In [31]:
jumanji_predicted_score = get_predicted_movie_score('Jumanji (1995)', user_preferences_list)
jumanji_predicted_score

3

In [33]:
rings_predicted_score = get_predicted_movie_score('Lord of the Rings, The (1978)', user_preferences_list)
rings_predicted_score

12