# Read the films genres

In [1]:
import pandas as pd

films = pd.read_csv('./dataset/movies.csv')
films.set_index('movieId', inplace=True)
films.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


# Read the Ratings

In [2]:
ratings = pd.read_csv('./dataset/ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


<br>
<br>
<br>

# 1. Recomending by Views

### 1.1. Most viewed recently

In [40]:
import time
from datetime import datetime, timedelta

range_time = (datetime.now() - timedelta(days=395)).timestamp()
recent_ratings = ratings[ratings.timestamp >= range_time]

recent_ratings = recent_ratings.groupby(by="movieId").agg({"timestamp": "count"})
recent_ratings.rename(columns={"timestamp": "views"}, inplace=True)
recent_ratings.sort_values(by="views", inplace=True, ascending=False)

recent_ratings_movies = films.merge(view_mean, on="movieId", how="inner")
recent_ratings_movies.sort_values(by=["views"], ascending=False, inplace=True)

recent_ratings_movies.head(10)

Unnamed: 0_level_0,title,genres,rating,views
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.164134,329
318,"Shawshank Redemption, The (1994)",Crime|Drama,4.429022,317
296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4.197068,307
593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,4.16129,279
2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,4.192446,278
260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.231076,251
480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller,3.75,238
110,Braveheart (1995),Action|Drama|War,4.031646,237
589,Terminator 2: Judgment Day (1991),Action|Sci-Fi,3.970982,224
527,Schindler's List (1993),Drama|War,4.225,220


### 1.2. Top 10 movies according with the Views and Ratings

In [42]:
view_mean = ratings.groupby(by="movieId").agg({"rating": "mean", "timestamp": "count"})
view_mean.rename(columns={"timestamp": "views"}, inplace=True)
view_mean.sort_values(by="views", inplace=True, ascending=False)

first_recomendation = films.merge(view_mean, on="movieId", how="inner")
first_recomendation.sort_values(by=["views", "rating"], ascending=False, inplace=True)

first_recomendation.head(10)

Unnamed: 0_level_0,title,genres,rating,views
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.164134,329
318,"Shawshank Redemption, The (1994)",Crime|Drama,4.429022,317
296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4.197068,307
593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,4.16129,279
2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,4.192446,278
260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,4.231076,251
480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller,3.75,238
110,Braveheart (1995),Action|Drama|War,4.031646,237
589,Terminator 2: Judgment Day (1991),Action|Sci-Fi,3.970982,224
527,Schindler's List (1993),Drama|War,4.225,220


<br>
<br>
<br>

# 2. Same genres watched previously

    The variable bellow represents the movie that user already viewed

In [30]:
USER_WATCHING = "Comedy|Crime"

    Filter according with the history from the user and returns the most visualized movies of the same genres

In [31]:
top_same_genre = first_recomendation[[USER_WATCHING in genre for genre in first_recomendation.genres]]
top_same_genre.head(10)

Unnamed: 0_level_0,title,genres,rating,views
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
106782,"Wolf of Wall Street, The (2013)",Comedy|Crime|Drama,4.25,2
119145,Kingsman: The Secret Service (2015),Action|Adventure|Comedy|Crime,4.25,2
296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,5.0,1
608,Fargo (1996),Comedy|Crime|Drama|Thriller,5.0,1
57669,In Bruges (2008),Comedy|Crime|Drama|Thriller,5.0,1
61323,Burn After Reading (2008),Comedy|Crime|Drama,5.0,1
51255,Hot Fuzz (2007),Action|Comedy|Crime|Mystery,4.0,1


<br>
<br>
<br>

## 3. Find the most similar persons using rating

In [7]:
import numpy as np

movies_rating = ratings.merge(pd.DataFrame(films["title"]), on="movieId", how="inner")
movies_rating.drop(columns=["movieId", "timestamp"], inplace=True)
users = movies_rating['userId'].drop_duplicates().values

movies_rating = movies_rating.pivot_table(columns=['title'], index='userId', values='rating')

movies_rating.replace({np.nan: 0}, inplace=True)

#### Convert in train objects

In [8]:
X = movies_rating.values
y = movies_rating.index.values

#### Modeling

In [9]:
from sklearn.neighbors import NearestNeighbors

model = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(X)

<br>
<br>

#### Find near peaple

In [10]:
from IPython.display import HTML, display

target = 98

# Find the most similar
distances, indices = model.kneighbors([X[target]])

display(HTML("<h2>Similar people id:</h2> <h4>{}</h4>".format(str(indices[0].tolist())[1:-1])))

<br>
<br>

#### Recomending

In [11]:
mask = ratings.userId.isin(indices[0][1:])

# Filter the reviews by the near people
near_reviews = ratings[mask]

# Group by films
near_reviews = near_reviews.groupby("movieId").agg({"rating": "mean"})

# Remove films already viewed by the user
user_films = ratings[ratings.userId == target].movieId.values

mask = near_reviews.index.isin(user_films) == False
near_reviews = near_reviews[mask]
near_reviews.reset_index(inplace=True)

# Merge the movies
movies_views = films.merge(view_mean, on="movieId", how="inner")[["title", "genres", "views"]]
movies_views = movies_views.merge(near_reviews, on="movieId", how="inner")
recommended_movies = movies_views

# Sort descending by rating
recommended_movies.sort_values(["rating", "views"], inplace=True, ascending=False)

recommended_movies.head(20)

Unnamed: 0,movieId,title,genres,views,rating
11,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,204,5.0
90,1198,Raiders of the Lost Ark (Indiana Jones and the...,Action|Adventure,200,5.0
59,590,Dances with Wolves (1990),Adventure|Drama|Western,164,5.0
358,58559,"Dark Knight, The (2008)",Action|Crime|Drama|IMAX,149,5.0
18,165,Die Hard: With a Vengeance (1995),Action|Crime|Thriller,144,5.0
29,293,Léon: The Professional (a.k.a. The Professiona...,Action|Crime|Drama|Thriller,133,5.0
146,2329,American History X (1998),Crime|Drama,129,5.0
237,5349,Spider-Man (2002),Action|Adventure|Sci-Fi|Thriller,122,5.0
171,2918,Ferris Bueller's Day Off (1986),Comedy,109,5.0
338,48516,"Departed, The (2006)",Crime|Drama|Thriller,107,5.0
