In [28]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process

In [29]:
df_movies = pd.read_csv('../data/movies_testing.csv', usecols=['movieId', 'title'])
df_movies.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [30]:
df_ratings = pd.read_csv('../data/ratings_testing.csv', usecols=['userId', 'movieId', 'rating'])
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [31]:
movies_users = df_ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
movies_users.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
mat_movies_users = csr_matrix(movies_users.values)
mat_movies_users

<9724x610 sparse matrix of type '<class 'numpy.float64'>'
	with 100836 stored elements in Compressed Sparse Row format>

In [33]:
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20) 

In [34]:
model_knn.fit(mat_movies_users)

In [36]:
# use more attributes to enhance the recommendation
# this is similar based to the rating of the movie
# add attributes like genre, director, actors, age group, 
# the recommender will recommend movies that are similar to the movie that the user has rated highly

def recommend_movies(movie_name, data, model, n_recommendations):
    
    model.fit(data)
    idx = process.extractOne(movie_name, df_movies['title'])[2]
    print('Movie Selected: ', df_movies['title'][idx], ' Index: ',idx)
    print('Searching for recommendations......')
    distances, indices = model.kneighbors(data[idx], n_neighbors=n_recommendations+1) # +1 because the first movie is the movie itself
    for i in indices:
        print(df_movies['title'][i].where(i != idx))

recommend_movies('superman', mat_movies_users, model_knn, 20)

Movie Selected:  Superman (1978)  Index:  1986
Searching for recommendations......
1986                                                  NaN
1985                                Mommie Dearest (1981)
1057               Star Trek II: The Wrath of Khan (1982)
2246                             Ipcress File, The (1965)
2314    Women on the Verge of a Nervous Breakdown (Muj...
1904                                   Logan's Run (1976)
1059                 Star Trek IV: The Voyage Home (1986)
1988                                  Superman III (1983)
1058           Star Trek III: The Search for Spock (1984)
1556                                  Return to Oz (1985)
1987                                   Superman II (1980)
973                                          Akira (1988)
1056               Star Trek V: The Final Frontier (1989)
855                                 Drop Dead Fred (1991)
1390                                         Mulan (1998)
1801                                    Pale Ri