# Setup Data 

In [1]:
# import libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from fuzzywuzzy import fuzz
from sklearn.neighbors import NearestNeighbors 
from scipy.sparse import csr_matrix

In [2]:
class Recommender:
    def __init__(self, metric, algorithm, k, data, decode_id_movie):
        self.metric = metric
        self.algorithm = algorithm
        self.k = k
        self.data = data
        self.decode_id_movie = decode_id_movie
        self.data = data
        self.model = self._recommender().fit(data)
    
    def make_recommendation(self, new_movie, n_recommendations):
        recommended = self._recommend(new_movie=new_movie, n_recommendations=n_recommendations)
        print("...... \n...... \n...... \nPROCESSING \n...... \n...... \n...... \nDONE \n")
        return recommended 
    
    def _recommender(self):
        return NearestNeighbors(metric=self.metric, algorithm=self.algorithm, n_neighbors=self.k, n_jobs=-1)
    
    def _recommend(self, new_movie, n_recommendations):
        # Get the id of the recommended movies
        recommendations = []
        recommendation_ids = self._get_recommendations(new_movie=new_movie, n_recommendations=n_recommendations)
        # return the name of the movie using a mapping dictionary
        recommendations_map = self._map_indeces_to_movie_title(recommendation_ids)
        # Translate this recommendations into the ranking of movie titles recommended
        for i, (idx, dist) in enumerate(recommendation_ids):
            recommendations.append(recommendations_map[idx])
        return recommendations
                 
    def _get_recommendations(self, new_movie, n_recommendations):
        # Start the recommendation process
        print(f"Starting the recommendation process for {new_movie} ...")
        
        # Get the id of the movie according to the text
        recom_movie_id = self._fuzzy_matching(movie=new_movie)
        
        # Return the n neighbors for the movie id
        distances, indices = self.model.kneighbors(self.data[recom_movie_id], n_neighbors=n_recommendations+1)
        closedlist = distances
        
        
        
        
        return sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
    
    
    def _map_indeces_to_movie_title(self, recommendation_ids):
        # get reverse mapper
        return {movie_id: movie_title for movie_title, movie_id in self.decode_id_movie.items()}
    
    def _fuzzy_matching(self, movie):
        match_tuple = []
        # get match
        for title, idx in self.decode_id_movie.items():
            ratio = fuzz.ratio(title.lower(), movie.lower())
            if ratio >= 60:
                match_tuple.append((title, idx, ratio))
        # sort
        match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
        if not match_tuple:
            print(f"The recommendation system could not find a match for {movie}")
            return
          
        return match_tuple[0][1]

In [3]:
# loading csv file into pandas dataframe
# specify path
path1="/Users/prasiddha/Downloads/AI CW PT.2/Development/Movies.csv"

# read movies file
movies_df = pd.read_csv(path1)
movies_df.head()


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
# loading csv file into pandas dataframe
# specify path
path2="/Users/prasiddha/Downloads/AI CW PT.2/Development/rating.csv"

# read ratings file
rating_df = pd.read_csv(path2)
rating_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
df = pd.merge(movies_df, rating_df, on='movieId')
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483


In [6]:
df.isnull().sum()


movieId      0
title        0
genres       0
userId       0
rating       0
timestamp    0
dtype: int64

# Rating Matrix

In [7]:
movie_rating_count = (df.groupby(by=['title'])['rating'].count().reset_index().rename(columns={'rating':'total_rating_count'}))
movie_rating_count.head()

Unnamed: 0,title,total_rating_count
0,'Hellboy': The Seeds of Creation (2004),1
1,'Round Midnight (1986),2
2,'Salem's Lot (2004),1
3,'Til There Was You (1997),2
4,'Tis the Season for Love (2015),1


In [8]:
total_movie_rating_count_df = pd.merge(df, movie_rating_count, on='title')
total_movie_rating_count_df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp,total_rating_count
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703,205
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962,205
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946,205
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970,205
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483,205


In [9]:
total_movie_rating_count_df.describe()

Unnamed: 0,movieId,userId,rating,timestamp,total_rating_count
count,93228.0,93228.0,93228.0,93228.0,93228.0
mean,19318.053771,303.447108,3.521603,1200300000.0,57.761788
std,35491.159175,171.031434,1.04234,215215700.0,60.677294
min,1.0,1.0,0.5,828124600.0,1.0
25%,1193.0,160.0,3.0,1009692000.0,13.0
50%,2947.0,305.0,3.5,1181478000.0,38.0
75%,7701.0,448.0,4.0,1431958000.0,83.0
max,193609.0,599.0,5.0,1537799000.0,321.0


In [10]:
movie_user = df.groupby('movieId')['userId'].count()

In [11]:
movie_ten_id = movie_user[movie_user > 2].index.to_list()

In [12]:
df_movie_id_more_ten = df[df['userId'].isin(movie_ten_id)].reset_index(drop=True)


In [13]:
df_movie_features = df_movie_id_more_ten.pivot(index='movieId', columns='userId', values='rating').fillna(0)

In [14]:
# obtain a sparse matrix
mat_movie_features = csr_matrix(df_movie_features.values)

In [15]:
df_movie_features.head()

userId,1,2,3,4,5,6,7,8,9,10,...,588,589,590,592,593,594,595,596,597,599
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,4.0,3.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,0.0,2.5,4.0,0.0,4.0,0.0,0.0,0.0,2.5
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,3.0,0.0,3.0,0.0,0.0,4.0,0.0,0.0,0.0,1.5
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
df_unique_movie = df.drop_duplicates(subset=['movieId']).reset_index(drop=True)[['movieId', 'title']]


In [17]:
decode_id_movie = {
    movie: i for i, movie in 
    enumerate(list(df_unique_movie.set_index('movieId').loc[df_movie_features.index].title))
}

In [18]:
decode_id_movie

{'Toy Story (1995)': 0,
 'Jumanji (1995)': 1,
 'Grumpier Old Men (1995)': 2,
 'Waiting to Exhale (1995)': 3,
 'Father of the Bride Part II (1995)': 4,
 'Heat (1995)': 5,
 'Sabrina (1995)': 6,
 'Tom and Huck (1995)': 7,
 'Sudden Death (1995)': 8,
 'GoldenEye (1995)': 9,
 'American President, The (1995)': 10,
 'Dracula: Dead and Loving It (1995)': 11,
 'Balto (1995)': 12,
 'Nixon (1995)': 13,
 'Cutthroat Island (1995)': 14,
 'Casino (1995)': 15,
 'Sense and Sensibility (1995)': 16,
 'Four Rooms (1995)': 17,
 'Ace Ventura: When Nature Calls (1995)': 18,
 'Money Train (1995)': 19,
 'Get Shorty (1995)': 20,
 'Copycat (1995)': 21,
 'Assassins (1995)': 22,
 'Powder (1995)': 23,
 'Leaving Las Vegas (1995)': 24,
 'Othello (1995)': 25,
 'Now and Then (1995)': 26,
 'Persuasion (1995)': 27,
 'City of Lost Children, The (Cité des enfants perdus, La) (1995)': 28,
 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)': 29,
 'Dangerous Minds (1995)': 30,
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)': 

# Movie Recommendation


In [23]:
try:
    model = Recommender(metric='cosine', algorithm='brute', k=20, data=mat_movie_features, decode_id_movie=decode_id_movie)
    movie =  input("Enter Your Input: ")
    new_recommendations = model.make_recommendation(new_movie=movie, n_recommendations=10)

    openlist = new_recommendations
    
    print(f"The Recommendations for '{movie}' are:")
    
    print('\n'.join(map(str, openlist)))

    print("\nEnjoy your MOVIE, THANK YOU")
    
   

    
except:
    print("...... \nSorry, this movie is not in our database!")
    

Enter Your Input: titanic
Starting the recommendation process for titanic ...
...... 
...... 
...... 
PROCESSING 
...... 
...... 
...... 
DONE 

The Recommendations for 'titanic' are:
Big Boss, The (Fists of Fury) (Tang shan da xiong) (1971)
Bustin' Loose (1981)
Love Is a Many-Splendored Thing (1955)
Crimes of the Heart (1986)
Good Mother, The (1988)
Deceiver (1997)
Raise the Titanic (1980)
Crimson Pirate, The (1952)
Boiling Point (1993)
Pajama Game, The (1957)

Enjoy your MOVIE, THANK YOU
