### En este ejemplo, crearemos un sistema de recomendaciones que recomiende películas a los usuarios
### Utilizaremos el conjunto de datos movies.csv

In [1]:
import pandas as pd

In [2]:
import numpy as np

### Para poder usar la columna movie_genre, tenemos que convertirla en un grupo de campos llamados dummy_variables.

### Esta función convierte una variable categórica (por ejemplo, el genéro de la película puede ser Animation, Comedy, Romance...), en múltiples columnas,uns para cada categoría

### Para cada película, éstas columnas dummy tendrán un valor de 0 excepto para aquellos géneros que tenga la película.


In [3]:
u_cols = ['movieId', 'title', 'genres']
movies = pd.read_csv('movies.csv',names=u_cols)
movies = movies[movies.movieId != 'movieId']
movies = pd.concat([movies, movies.genres.str.get_dummies(sep='|')], axis=1)

In [4]:
movie_categories = movies.columns[3:].tolist()

In [5]:
movies.head()

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2,2,Jumanji (1995),Adventure|Children|Fantasy,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,Grumpier Old Men (1995),Comedy|Romance,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
4,4,Waiting to Exhale (1995),Comedy|Drama|Romance,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
5,5,Father of the Bride Part II (1995),Comedy,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
movies.tail()

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
40106,165193,Tanya's Island (1980),Drama|Fantasy|Romance,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
40107,165195,Pacific Banana (1981),Comedy,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
40108,165197,Werewolf in a Women's Prison (2006),(no genres listed),1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
40109,165199,The Fearless Young Boxer (1979),Action,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
40110,165201,Snatched (1973),Crime|Drama|Thriller,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [7]:
movies.title

1                                         Toy Story (1995)
2                                           Jumanji (1995)
3                                  Grumpier Old Men (1995)
4                                 Waiting to Exhale (1995)
5                       Father of the Bride Part II (1995)
6                                              Heat (1995)
7                                           Sabrina (1995)
8                                      Tom and Huck (1995)
9                                      Sudden Death (1995)
10                                        GoldenEye (1995)
11                          American President, The (1995)
12                      Dracula: Dead and Loving It (1995)
13                                            Balto (1995)
14                                            Nixon (1995)
15                                 Cutthroat Island (1995)
16                                           Casino (1995)
17                            Sense and Sensibility (199

### La película con una id de 25, Leaving Las Vegas, pertenece a los géneros Drama y romance


In [8]:
movies.loc[25]

movieId                                     25
title                 Leaving Las Vegas (1995)
genres                           Drama|Romance
(no genres listed)                           0
Action                                       0
Adventure                                    0
Animation                                    0
Children                                     0
Comedy                                       0
Crime                                        0
Documentary                                  0
Drama                                        1
Fantasy                                      0
Film-Noir                                    0
Horror                                       0
IMAX                                         0
Musical                                      0
Mystery                                      0
Romance                                      1
Sci-Fi                                       0
Thriller                                     0
War          

In [9]:
movie_categories

['(no genres listed)',
 'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western']

In [11]:
def get_user_preferences(user_movies):
    #lo que hace es calcular un vector con la media de las generos de las peliculas del usuario
    user_features = movies[movies.title.isin(user_movies)].ix[:,3:].T
    user_features = user_features.mean(axis=1).reset_index()
    print(user_features)
    return user_features.ix[:,1].tolist()

In [12]:
user_movies = [
    'Atari: Game Over (2014)',
    'Antarctica: A Year On Ice (2013)'
]

In [13]:
user_preferences_list = get_user_preferences(user_movies)

                 index    0
0   (no genres listed)  0.0
1               Action  0.0
2            Adventure  0.5
3            Animation  0.0
4             Children  0.0
5               Comedy  0.0
6                Crime  0.0
7          Documentary  1.0
8                Drama  0.0
9              Fantasy  0.0
10           Film-Noir  0.0
11              Horror  0.0
12                IMAX  0.0
13             Musical  0.0
14             Mystery  0.0
15             Romance  0.0
16              Sci-Fi  0.0
17            Thriller  0.0
18                 War  0.0
19             Western  0.0


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


In [14]:
def get_predicted_movie_score(movie_name, user_preferences):
    #le pasamos el vector creado anteriormente y una pelicula y nos dice si es semejante a nuestro vector
    movie_slice = movies[movies.title==movie_name].iloc[0]
    movie_features = movie_slice[movie_categories]
    return np.dot(movie_features, user_preferences)

In [15]:
#Action +Sci-Fi + Thriller #no te la recomienda
get_predicted_movie_score('Armageddon (1998)', user_preferences_list)

0.0

In [16]:
#Documentary #recomendable
get_predicted_movie_score('Lambert & Stamp (2014)', user_preferences_list)

1.0

In [17]:
#Adventure #recomendable
get_predicted_movie_score('The Great Alone (2015)', user_preferences_list)

1.5

In [18]:
def get_movie_recommendations(user_preferences, number_recommendations):
    #para introducir el segundo argumento de la funcion se usa la palabra clave args = ([lista argumentos])
    movies['score'] =movies.title.apply(get_predicted_movie_score, args=([user_preferences]))
    movies.sort_values(by=['score'], ascending=False, inplace=True)
    return movies[['title','genres','score']].head(number_recommendations)

get_movie_recommendations(user_preferences_list, 10)

Unnamed: 0,title,genres,score
16459,Sweetgrass (2009),Adventure|Documentary|Western,1.5
34757,Steak (R)évolution (2014),Adventure|Children|Documentary,1.5
19524,Bigfoot Lives (2007),Adventure|Documentary|Drama,1.5
23908,Ride the Divide (2010),Adventure|Documentary,1.5
38703,The Great Alone (2015),Adventure|Documentary|Drama,1.5
14070,Wings of Hope (Julianes Sturz in den Dschungel...,Adventure|Documentary,1.5
440,"Endless Summer 2, The (1994)",Adventure|Documentary,1.5
27916,Project Wild Thing (2013),Adventure|Children|Documentary|Drama,1.5
7943,Freedom Downtime (2001),Adventure|Crime|Documentary,1.5
9889,Incident at Loch Ness (2004),Adventure|Comedy|Documentary,1.5


In [19]:
#obtener recomendaciones a partir de los gustos del usuario

from collections import OrderedDict

user_preferences = OrderedDict(zip(movie_categories, []))

user_preferences['(no genres listed)'] = 0 
user_preferences['Action'] = 1  
user_preferences['Adventure'] = 1  
user_preferences['Animation'] = 5  
user_preferences["Children's"] = 0  
user_preferences["Comedy"] = 1  
user_preferences['Crime'] = 1  
user_preferences['Documentary'] = 1  
user_preferences['Drama'] = 1  
user_preferences['Fantasy'] = 1  
user_preferences['Film-Noir'] = 1  
user_preferences['Horror'] = 0
user_preferences['IMAX'] = 1 
user_preferences['Musical'] = 1  
user_preferences['Mystery'] = 1  
user_preferences['Romance'] = 1  
user_preferences['Sci-Fi'] = 5  
user_preferences['War'] = 1  
user_preferences['Thriller'] = 1  
user_preferences['Western'] =1  

user_preferences_list = list(user_preferences.values())

In [20]:
get_movie_recommendations(user_preferences_list, 10)

Unnamed: 0,title,genres,score
9003,Patlabor: The Movie (Kidô keisatsu patorebâ: T...,Action|Animation|Crime|Drama|Film-Noir|Mystery...,16
11812,Aqua Teen Hunger Force Colon Movie Film for Th...,Action|Adventure|Animation|Comedy|Fantasy|Myst...,15
11236,Mind Game (2004),Adventure|Animation|Comedy|Fantasy|Romance|Sci-Fi,14
39026,Kingsglaive: Final Fantasy XV (2016),Action|Adventure|Animation|Drama|Fantasy|Sci-Fi,14
9829,Robots (2005),Adventure|Animation|Children|Comedy|Fantasy|Sc...,14
31966,Mobile Suit Gundam III: Encounters in Space (1...,Action|Adventure|Animation|Drama|Sci-Fi|War,14
31965,Mobile Suit Gundam II: Soldiers of Sorrow (1981),Action|Adventure|Animation|Drama|Sci-Fi|War,14
16901,Mars Needs Moms (2011),Action|Adventure|Animation|Children|Comedy|Sci...,14
13117,Futurama: Bender's Game (2008),Action|Adventure|Animation|Comedy|Fantasy|Sci-Fi,14
34037,Princes and Princesses (2000),Animation|Children|Comedy|Drama|Fantasy|Romanc...,14
