## STEP 0: LIBRARIES IMPORT

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## STEP 1: IMPORT DATASET

In [None]:
movie_titles_df = pd.read_csv('Movie_ID_Titles')

In [None]:
movie_titles_df

In [None]:
movies_rating_df = pd.read_csv('u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])

In [None]:
movies_rating_df

In [None]:
movies_rating_df.drop(['timestamp'], axis = 1, inplace = True)

In [None]:
movies_rating_df

In [None]:
movies_rating_df.describe()

In [None]:
movies_rating_df.info()

In [None]:
movies_rating_df = pd.merge(movies_rating_df, movie_titles_df, on = 'item_id')

In [None]:
movies_rating_df

## STEP 2: VISUALIZE THE DATASET

In [None]:
movies_rating_df.groupby('title').describe()

In [None]:
movies_rating_df.groupby('title')['rating'].describe()

In [None]:
ratings_df_mean = movies_rating_df.groupby('title')['rating'].describe()['mean']

In [None]:
ratings_df_mean

In [None]:
ratings_df_count = movies_rating_df.groupby('title')['rating'].describe()['count']

In [None]:
ratings_df_count

In [None]:
ratings_mean_count_df = pd.concat([ratings_df_count, ratings_df_mean], axis = 1)

In [None]:
ratings_mean_count_df

In [None]:
ratings_mean_count_df.reset_index()

In [None]:
ratings_mean_count_df['mean'].plot(bins = 100, kind = 'hist', color = 'r')

In [None]:
ratings_mean_count_df['count'].plot(bins = 100, kind = 'hist', color = 'r')

In [None]:
ratings_mean_count_df[ratings_mean_count_df['mean'] == 5]

In [None]:
ratings_mean_count_df.sort_values('count', ascending = False).head(100)

## STEP 3: PERFORM ITEM-BASED COLLABORATIVE FILTERING ON ONE MOVIE SAMPLE

In [None]:
userid_movietitle_matrix = movies_rating_df.pivot_table(index = 'user_id', columns = 'title', values = 'rating') 

In [None]:
userid_movietitle_matrix

In [None]:
titanic = userid_movietitle_matrix['Titanic (1997)']

In [None]:
titanic

In [None]:
starwars = userid_movietitle_matrix['Star Wars (1977)']

In [None]:
starwars

In [None]:
# Let's calculate the correlations
titanic_correlations = pd.DataFrame(userid_movietitle_matrix.corrwith(titanic), columns=['Correlation'])
titanic_correlations = titanic_correlations.join(ratings_mean_count_df['count'])


In [None]:
titanic_correlations

In [None]:
titanic_correlations.dropna(inplace = True)
titanic_correlations

In [None]:
titanic_correlations.sort_values('Correlation', ascending = False)

In [None]:
titanic_correlations[titanic_correlations['count']>80].sort_values('Correlation', ascending = False).head()

## STEP 4: CREATE AN ITEM-BASED COLLABORATIVE FILTER ON THE ENTIRE DATASET

In [None]:
userid_movietitle_matrix

In [None]:
movie_correlations = userid_movietitle_matrix.corr(method = 'pearson', min_periods = 80)

In [None]:
movie_correlations

In [None]:
myRatings = pd.read_csv('My_Ratings.csv')

In [None]:
myRatings

In [None]:
myRatings['Movie Name'][0]

In [None]:
similiar_movies_list = pd.Series()

for i in range(0,2):
    similiar_movie = movie_correlations[  myRatings['Movie Name'][i]  ].dropna()
    similiar_movie = similiar_movie.map(lambda x: x* myRatings ['Ratings'][i])
    similiar_movies_list = similiar_movies_list.append(similiar_movie)
    

In [None]:
similiar_movies_list.sort_values(inplace = True, ascending = False)
print(similiar_movies_list.head(10))