## Recommendation system using K-Nearest Neighbouehood

In [16]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process


In [17]:


# movie and rating csv file path
movies = '../recomendation system/movies.csv'
ratings = '../recomendation system/ratings.csv'

# create a data frame for movies and ratings
df_movies  = pd.read_csv(movies, usecols=['movieId', 'title'])
df_ratings = pd.read_csv(ratings, usecols=['userId','movieId','rating'])
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


In [18]:
df_ratings.shape,df_movies.shape

((100836, 3), (9742, 2))

In [19]:
# spare matrix 
movies_users= df_ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
movies_users

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
mat_movies_users = csr_matrix(movies_users.values)


In [21]:
# Euclidean distance
# Manhattan distance
# MInkowski distance
# Cosine similarity
model_knn = NearestNeighbors(metric= 'cosine', algorithm='brute', n_neighbors=20)

In [22]:
model_knn.fit(mat_movies_users)

In [23]:
df_movies= df_movies.loc[df_movies['movieId'].isin(df_ratings['movieId'])]
df_movies

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)
...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017)
9738,193583,No Game No Life: Zero (2017)
9739,193585,Flint (2017)
9740,193587,Bungo Stray Dogs: Dead Apple (2018)


In [32]:
# Recommender(movie_name) -> List of Movies recommended
def recommender (movie_name, data, model, n_recommendation):
    model.fit(data)
    idx = process.extractOne(movie_name, df_movies['title'])[2]
    print('Movie Selected:', df_movies['title'][idx], 'index:',idx)
    print('Searching for Recommendations.....')
    indices = model.kneighbors(data[idx], n_neighbors=n_recommendation+1,return_distance= False)
    for i in indices[0]:
        print(df_movies['title'][i])

recommender('Star Wars: Episode IV - A New Hope', mat_movies_users, model_knn, n_recommendation=20)


Movie Selected: Star Wars: Episode IV - A New Hope (1977) index: 224
Searching for Recommendations.....
Star Wars: Episode IV - A New Hope (1977)
Cheech and Chong's Up in Smoke (1978)
Once Upon a Time in the West (C'era una volta il West) (1968)
Princess Bride, The (1987)
Walk on the Moon, A (1999)
Some Kind of Wonderful (1987)
Arsenic and Old Lace (1944)
Black Mask (Hak hap) (1996)
Local Hero (1983)
Godfather, The (1972)
Jane Austen's Mafia! (1998)
Terminator 2: Judgment Day (1991)
Independence Day (a.k.a. ID4) (1996)
Fargo (1996)
Diva (1981)
Star Wars: Episode V - The Empire Strikes Back (1980)
Fall (1997)
Outsiders, The (1983)
Jurassic Park (1993)
Brazil (1985)
Silence of the Lambs, The (1991)


In [33]:
 # Recommender(movie_name) -> List of Movies recommended
def recommender(movie_name, n_recommendation):
    model_knn.fit(mat_movies_users)
    idx = process.extractOne(movie_name, df_movies['title'])[2]
    print('Movie Selected:', df_movies['title'][idx], 'index:', idx)
    print('Searching for Recommendations.....')
    distances, indices = model_knn.kneighbors(mat_movies_users[idx], n_neighbors=n_recommendation+1)
    for i in indices[0]:
        if i != idx:
            print(df_movies['title'][i])

recommender('Star Wars: Episode IV - A New Hope', n_recommendation=20)


Movie Selected: Star Wars: Episode IV - A New Hope (1977) index: 224
Searching for Recommendations.....
Cheech and Chong's Up in Smoke (1978)
Once Upon a Time in the West (C'era una volta il West) (1968)
Princess Bride, The (1987)
Walk on the Moon, A (1999)
Some Kind of Wonderful (1987)
Arsenic and Old Lace (1944)
Black Mask (Hak hap) (1996)
Local Hero (1983)
Godfather, The (1972)
Jane Austen's Mafia! (1998)
Terminator 2: Judgment Day (1991)
Independence Day (a.k.a. ID4) (1996)
Fargo (1996)
Diva (1981)
Star Wars: Episode V - The Empire Strikes Back (1980)
Fall (1997)
Outsiders, The (1983)
Jurassic Park (1993)
Brazil (1985)
Silence of the Lambs, The (1991)
