## Recommendation system using K-Nearest Neighbouehood

In [2]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process


In [3]:


# movie and rating csv file path
movies = '../ml-latest-small/movies.csv'
ratings = '../ml-latest-small/ratings.csv'

# create a data frame for movies and ratings
df_movies  = pd.read_csv(movies, usecols=['movieId', 'title'])
df_ratings = pd.read_csv(ratings, usecols=['userId','movieId','rating'])
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [4]:
# spare matrix 
movies_users= df_ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
movies_users.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
mat_movies_users = csr_matrix(movies_users.values)


In [6]:
# Euclidean distance
# Manhattan distance
# MInkowski distance
# Cosine similarity
model_knn = NearestNeighbors(metric= 'cosine', algorithm='brute', n_neighbors=20)

In [7]:
model_knn.fit(mat_movies_users)

In [12]:
# Recommender(movie_name) -> List of Movies recommended
def recommender (movie_name, data, model, n_recommendation):
    model.fit(data)
    idx = process.extractOne(movie_name, df_movies['title'])[2]
    print('Movie Selected:', df_movies['title'][idx], 'index:',idx)
    print('Searching for Recommendations.....')
    distances, indices = model.kneighbors(data[idx], n_neighbors=n_recommendation)
    for i in indices:
        print(df_movies['title'][i].where(i!= idx))

recommender('toy story', mat_movies_users, model_knn, n_recommendation=20)


Movie Selected: Toy Story (1995) index: 0
Searching for Recommendations.....
0                                                     NaN
2353                                 'night Mother (1986)
418                                  Jurassic Park (1993)
615                  Independence Day (a.k.a. ID4) (1996)
224             Star Wars: Episode IV - A New Hope (1977)
314                                   Forrest Gump (1994)
322                                 Lion King, The (1994)
910     Once Upon a Time in the West (C'era una volta ...
546                            Mission: Impossible (1996)
963                                           Diva (1981)
968                           Arsenic and Old Lace (1944)
3189            Rififi (Du rififi chez les hommes) (1955)
506                                        Aladdin (1992)
123                                      Apollo 13 (1995)
257                                   Pulp Fiction (1994)
897                 Cheech and Chong's Up in Smoke (1

In [13]:
recommender('iron man', mat_movies_users, model_knn, n_recommendation=20)


Movie Selected: Iron Man (2008) index: 6743
Searching for Recommendations.....
6743                                            NaN
7197                                  Garage (2007)
7195                        Merry Madagascar (2009)
7354                             A-Team, The (2010)
6726                         Superhero Movie (2008)
7137                         Thirst (Bakjwi) (2009)
7026                                 Scorpio (1973)
7571                                 Win Win (2011)
3880                  Look Who's Talking Now (1993)
6388    After the Wedding (Efter brylluppet) (2006)
7601                       Idiots and Angels (2008)
6755                Nina's Heavenly Delights (2006)
7022                                   Earth (2007)
7338                          Blue Valentine (2010)
4421                         What's Up, Doc? (1972)
6195                             Silent Hill (2006)
5885                                   Crash (2004)
3740                              Mot