Importing the required libraries.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from math import pow, sqrt

Importing user data

In [None]:
if os.path.isfile("/content/ratings.csv") == True:
  users = pd.read_csv('ratings.csv')
  users.rename(columns = {'userId':'user_id', 'movieId':'item_id'}, inplace=True)
else : 
  users = pd.read_csv('Dataset.csv')

#print("Few initial observations in user data are : \n ",users.head(),'\n')
#print(('There are few missing values in user data : '), users.isnull().values.any(),'\n')

Importing movie data

In [None]:
if os.path.isfile("/content/movies.csv") == True:
  movies = pd.read_csv('movies.csv')
  movies.rename(columns = {'movieId':'item_id'}, inplace=True)
  movies.genre = movies.genres.str.split('|')
  genreColumns = list(set([j for i in movies['genres'].tolist() for j in i]))
  for j in genreColumns:
    movies[j] = 0
  for i in range(movies.shape[0]):
    for j in genreColumns:
      if(j in movies['genres'].iloc[i]):
        movies.loc[i,j] = 1

else :   
  movies = pd.read_csv('Movie_Id_Titles.csv')
#print("Few initial observations in movies data are : \n ",movies.head(),'\n')
#print(('There are few missing values in movies data : '), movies.isnull().values.any(),'\n')

  after removing the cwd from sys.path.


Creating rating data

In [None]:
#This code cell will create one more dataframe named ratings 
ratings = pd.merge(users, movies, on='item_id')
#ratings.head()

In [None]:
avgRating = ratings.groupby('title')['rating'].mean().sort_values(ascending=False)
#print("Avg rating of each movie is : \n",avgRating.head())

In [None]:
'''
plt.rcParams["figure.figsize"] = (100,100)
sns.heatmap(movies.corr(), annot =True, linewidths=.5, cmap='YlGnBu')
plt.title('Correlation Matrix')
'''

'\nplt.rcParams["figure.figsize"] = (100,100)\nsns.heatmap(movies.corr(), annot =True, linewidths=.5, cmap=\'YlGnBu\')\nplt.title(\'Correlation Matrix\')\n'

Function to get rating given by a user to a movie

In [None]:
def getRating(userid,movieid):
  return (ratings.loc[(ratings.user_id==userid) & (ratings.item_id == movieid),'rating'].iloc[0])

Function to get list of all movie vs user raing

In [None]:
def getMovie(userid):
  return (ratings.loc[(ratings.user_id==userid),'item_id'].tolist())

Function to get movie name vs the movie id.

In [None]:
def getMovieTitle(movieid):
  return (movies.loc[(movies.item_id == movieid),'title'].iloc[0])

In [None]:
def distanceSimilarity(user1,user2):
  watchCount = 0
  for i in ratings.loc[ratings.user_id==user1,'item_id'].tolist():
      if i in ratings.loc[ratings.user_id==user2,'item_id'].tolist():
          watchCount += 1
  if watchCount == 0 :
      return 0
  distance = []
  for j in ratings.loc[ratings.user_id==user1,'item_id'].tolist():
      if j in ratings.loc[ratings.user_id==user2,'item_id'].tolist():
          rating1 = getRating(user1,j)
          rating2 = getRating(user2,j)
          distance.append(pow(rating1 - rating2, 2))
  totalDistance = sum(distance)
  return 1/(1+sqrt(totalDistance))

In [None]:
#distanceSimilarity(11,111)

In [None]:
def userCorrelation(user1,user2):
  watchCount = []
  for i in ratings.loc[ratings.user_id==user1,'item_id'].tolist():
      if i in ratings.loc[ratings.user_id==user2,'item_id'].tolist():
          watchCount.append(i)
  if len(watchCount) == 0 :
      return 0
  sum1 = sum([getRating(user1,i) for i in watchCount])
  sum2 = sum([getRating(user2,i) for i in watchCount])
  squaredSum1 = sum([pow(getRating(user1,i),2) for i in watchCount])
  squaredSum2 = sum([pow(getRating(user2,i),2) for i in watchCount])
  totalMovieRating = sum([getRating(user1,i) * getRating(user2,i) for i in watchCount])
  
  numerator = totalMovieRating - ((sum1 * sum2) / len(watchCount))
  denominator = sqrt((squaredSum1 - pow(sum1,2) / len(watchCount)) * (squaredSum2 - pow(sum2,2) / len(watchCount)))
  if denominator == 0:
      return 0
  return numerator/denominator

In [None]:
#userCorrelation(11,111)

In [None]:
def similarUsers(user1,userCount,metric='pearson'):
  user_ids = ratings.user_id.unique().tolist()
  if(metric == 'pearson'):
      similarityScore = [(userCorrelation(user1,nthUser),nthUser) for nthUser in user_ids[:100] if nthUser != user1]
  else:
      similarityScore = [(distanceSimilarity(user1,nthUser),nthUser) for nthUser in user_ids[:100] if nthUser != user1]
  
  similarityScore.sort()
  similarityScore.reverse()
  return similarityScore[:userCount]

In [None]:
def Recommendation(userid):
  user_ids = ratings.user_id.unique().tolist()
  total = {}
  similariySum = {}

  for user in user_ids[:100]:
    if user == userid:
      continue
    score = userCorrelation(userid,user)
    if score <= 0:
        continue 
    for movieid in getMovie(user):
      if movieid not in getMovie(userid) or getRating(userid,movieid) == 0:
        total[movieid] = 0
        total[movieid] += getRating(user,movieid) * score
        similariySum[movieid] = 0
        similariySum[movieid] += score

  ranking = [(tot/similariySum[movieid],movieid) for movieid,tot in total.items()]
  ranking.sort()
  ranking.reverse()
  recommendations = [getMovieTitle(movieid) for score,movieid in ranking]
  return recommendations[:10]

In [None]:
id = int(input("Enter user ID : "))
rec = Recommendation(id)
print(f"Top 10 movie recommendation for {id} are : ",*rec, sep = "\n")

Enter user ID : 3
Top 10 movie recommendation for 3 are : 
Olive Kitteridge (2014)
Cowboy Bebop (1998)
American Hustle (2013)
From One Second to the Next (2013)
50 Children: The Rescue Mission of Mr. And Mrs. Kraus (2013)
Before Midnight (2013)
Lincoln (2012)
Resident Evil: Retribution (2012)
Samsara (2011)
Superman/Batman: Public Enemies (2009)
