In [None]:
import pandas as pd
import numpy as np
import warnings

In [None]:
warnings.filterwarnings('ignore')

### Reading the movie dataset

In [None]:
df = pd.read_csv("ml-100k/u.data",sep='\t',names = ["user_id","item_id","rating","timestamp"])

In [None]:
df

In [None]:
df.shape

In [None]:
df['user_id'].nunique()

In [None]:
df['item_id'].nunique()

In [None]:
movie_titles = pd.read_csv("ml-100k/u.item",sep="\|",header = None,encoding='latin-1')

In [None]:
movie_titles 

In [None]:
movie_titles = movie_titles[[0,1]]
movie_titles.columns = ['item_id','title']

In [None]:
movie_titles

In [None]:
df = pd.merge(df,movie_titles,on = "item_id")
df

### Exploratory Data Analysis

In [None]:
import matplotlib.pyplot as plt
#!pip install seaborn
import seaborn as sns

In [None]:
ratings = pd.DataFrame(df.groupby('title').mean()['rating'])
ratings

In [None]:

ratings['num of ratings'] = pd.DataFrame(df.groupby('title').count()['rating'])

In [None]:
ratings.sort_values(by=['rating'],ascending = False)

In [None]:
plt.figure(figsize=(10,6))
plt.hist(ratings['num of ratings'],bins=70)
plt.show()

In [None]:
plt.hist(ratings['rating'],bins=70)
plt.show()

In [None]:
sns.jointplot(x='rating',y='num of ratings',data=ratings,alpha = 0.5)

### Recommender

In [None]:
df

In [None]:
movie_matrix = df.pivot_table(index = "user_id",columns = "title",values = "rating")

In [None]:
movie_matrix

In [None]:
starwars_user_ratings = movie_matrix['Star Wars (1977)']

In [None]:
starwars_user_ratings

In [None]:
similar_to_starwars = movie_matrix.corrwith(starwars_user_ratings)

# Corelation = Covariance(x,y) / σ(x)*σ(y)

# Covariance(x,y) = Σ(xᵢ - x̄)*(yᵢ - ȳ) / Total records

# Where,

# σ(y) = Standard Deviation of y

# σ(x) = Standard Deviation of x
# xᵢ = iᵗʰ element of x
# x̄ = sample mean of x
# yᵢ = iᵗʰ element of y
# ȳ = sample mean of y

In [None]:
similar_to_starwars

In [None]:
corr_starwars = pd.DataFrame(similar_to_starwars,columns = ['Correlation'])

In [None]:
corr_starwars.dropna(inplace=True)

In [None]:
corr_starwars

In [None]:
corr_starwars.sort_values(by=['Correlation'],ascending = False)

In [None]:
corr_starwars=corr_starwars.join(ratings['num of ratings'])

In [None]:
corr_starwars[corr_starwars['num of ratings']>=100].sort_values(by=['Correlation'],ascending = False)

### Recommendation Funtion

In [None]:
def recommend_movie(movie_name):
    movie_user_ratings = movie_matrix[movie_name]
    similar_to_movie = movie_matrix.corrwith(movie_user_ratings)
    
    corr_movie = pd.DataFrame(similar_to_movie,columns = ['Correlation'])
    corr_movie.dropna(inplace=True)
    
    corr_movie=corr_movie.join(ratings['num of ratings'])
    predictions = corr_movie[corr_movie['num of ratings']>=100].sort_values(by=['Correlation'],ascending = False)
    
    return predictions

In [None]:
recommend_movie("Titanic (1997)").head(5)

In [None]:
import pickle

In [None]:
pickle.dump(df,open('movie_dict.pkl','wb'))

In [None]:
pickle.dump(movie_matrix,open('movie_matrix_dict.pkl','wb'))

 User 1:              User 2:
 Movie 1   3.0        Movie 1   5.0
 Movie 2   2.0        Movie 2   4.0  
 Movie 3   4.0        
 Movie 4   1.0

 Now, we want movie recommendations based on Movie 2
 So, we will be recommended Movie 1 and not Movie 3 or Movie 4 as our choice matches('Corelates') with User 2