# Item-Based Collaborative Filtering (Amazon, Netlix Prize)

In [None]:
import pandas as pd

ratings_cols = ['user_id', 'movie_id', 'rating']
ratings = pd.read_csv('./ml-100k/u.data', sep='\t', names=ratings_cols, usecols=range(3), encoding="ISO-8859-1")

movies_cols = ['movie_id', 'title']
movies = pd.read_csv('./ml-100k/u.item', sep='|', names=movies_cols, usecols=range(2), encoding="ISO-8859-1")

ratings = pd.merge(movies, ratings)

ratings.head()

In [None]:
user_ratings = ratings.pivot_table(index=['user_id'],columns=['title'],values='rating')
user_ratings.head(10)

Compute a correlation score for every column pair - so every movie with every other movie

In [None]:
corr_matrix = user_ratings.corr()
corr_matrix.head(10)

Clean the data - minimum 100 people need to review pair of movies

In [None]:
corr_matrix = user_ratings.corr(method='pearson', min_periods=100)
corr_matrix.head()

Check some user ratings - pick the one who's similar to you (may take a while)

In [None]:
one_user_ratings = user_ratings.loc[150].dropna()
one_user_ratings.sort_values(ascending = False).head(10)

Based on the movies the user watched and rating he/she gave them, let's create movies recommendations

In [None]:
recommendations = pd.Series()
for i in range(0, len(one_user_ratings.index)):
    print ("Adding recommendations for movie" + one_user_ratings.index[i] + ".............")
    related_movies = corr_matrix[one_user_ratings.index[i]].dropna()
    # scaling by movie score
    related_movies = related_movies.map(lambda x: x * one_user_ratings[i])
    recommendations = recommendations.append(related_movies)
    
print ("Movie Recommendations:")
recommendations.sort_values(inplace = True, ascending = False)
recommendations.head(10)

Remove duplicates by aggreagating the same proposals:

In [None]:
recommendations = recommendations.groupby(recommendations.index).sum()

In [None]:
recommendations.sort_values(inplace = True, ascending = False)
recommendations.head(10)

Do not show movies the selected user watched already - we want to propose brand new movies he maybe didn't watch!

In [None]:
filtered_recommendations = recommendations.drop(one_user_ratings.index, errors = 'ignore')
filtered_recommendations.head(10)

Try to change the recommendation - maybe by not showing/ignoring movies highly related to movies specific user don't like, or to use other correlation method than person