In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
import seaborn as sns

In [2]:
ratings = pd.read_csv('ratings.csv', sep='\t', encoding='latin-1', usecols=['user_id', 'movie_id', 'rating'])
users = pd.read_csv('users.csv', sep='\t', encoding='latin-1', usecols=['user_id', 'gender', 'zipcode', 'age_desc', 'occ_desc'])
movies = pd.read_csv('movies.csv', sep='\t', encoding='latin-1', usecols=['movie_id', 'title', 'genres'])

In [3]:
ratings.shape

(1000209, 3)

In [5]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,1,1193,5
1,1,661,3
2,1,914,3
3,1,3408,4
4,1,2355,5


In [6]:
#User Item Matrix 

In [7]:
user_item_m = ratings.pivot('user_id','movie_id','rating').fillna(0)
print(f'Shape: {user_item_m.shape}')

  user_item_m = ratings.pivot('user_id','movie_id','rating').fillna(0)


Shape: (6040, 3706)


In [8]:
user_item_m.iloc[:10,:15].astype('i1').T.join(movies.set_index('movie_id').title).set_index('title').T.rename_axis('user_id')

title,Toy Story (1995),Jumanji (1995),Grumpier Old Men (1995),Waiting to Exhale (1995),Father of the Bride Part II (1995),Heat (1995),Sabrina (1995),Tom and Huck (1995),Sudden Death (1995),GoldenEye (1995),"American President, The (1995)",Dracula: Dead and Loving It (1995),Balto (1995),Nixon (1995),Cutthroat Island (1995)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0
6,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0
8,4,0,0,3,0,0,0,0,0,0,0,0,0,4,0
9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10,5,5,0,0,0,0,4,0,0,0,0,0,0,0,0


In [9]:
#similarity matrix

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
X_user = cosine_similarity(user_item_m)

In [12]:
X_user.shape

(6040, 6040)

In [13]:
X_item = cosine_similarity(user_item_m.T)

In [14]:
X_item.shape

(3706, 3706)

In [15]:
class CfRec():
    def __init__(self, M, X, items, k=20, top_n=10):
        self.X = X
        self.M = M
        self.k = k
        self.top_n = top_n
        self.items = items
        
    def recommend_user_based(self, user):
        ix = self.M.index.get_loc(user)
        # Use it to index the User similarity matrix
        u_sim = self.X[ix]
        # obtain the indices of the top k most similar users
        most_similar = self.M.index[u_sim.argpartition(-(self.k+1))[-(self.k+1):]]
        # Obtain the mean ratings of those users for all movies
        rec_items = self.M.loc[most_similar].mean(0).sort_values(ascending=False)
        # Discard already seen movies
        # already seen movies
        seen_mask = self.M.loc[user].gt(0)
        seen = seen_mask.index[seen_mask].tolist()
        rec_items = rec_items.drop(seen).head(self.top_n)
        # return recommendations - top similar users rated movies
        return (rec_items.index.to_frame()
                                .reset_index(drop=True)
                                .merge(self.items))

    def recommend_item_based(self, item):
        liked = self.items.loc[self.items.movie_id.eq(item), 'title'].item()
        print(f"Because you liked {liked}, we'd recommend you to watch:")
        # get index of movie
        ix = self.M.columns.get_loc(item)
        # Use it to index the User similarity matrix
        i_sim = self.X[ix]
        # obtain the indices of the top k most similar users
        most_similar = self.M.columns[i_sim.argpartition(-(self.k+1))[-(self.k+1):]]
        return (most_similar.difference([item])
                                 .to_frame()
                                 .reset_index(drop=True)
                                 .merge(self.items)
                                 .head(self.top_n))

In [16]:
def because_user_liked(user_item_m, movies, ratings, user):
    ix_user_seen = user_item_m.loc[user]>0.
    seen_by_user = user_item_m.columns[ix_user_seen]
    return (seen_by_user.to_frame()
                 .reset_index(drop=True)
                 .merge(movies)
                 .assign(user_id=user)
                 .merge(ratings[ratings.user_id.eq(user)])
                 .sort_values('rating', ascending=False).head(10))

In [17]:
# Testing User based

In [18]:
rec = CfRec(user_item_m, X_user, movies)

In [19]:
because_user_liked(user_item_m, movies, ratings, 2)

Unnamed: 0,movie_id,title,genres,user_id,rating
63,1610,"Hunt for Red October, The (1990)",Action|Thriller,2,5
22,593,"Silence of the Lambs, The (1991)",Drama|Thriller,2,5
113,3147,"Green Mile, The (1999)",Drama|Thriller,2,5
34,1124,On Golden Pond (1981),Drama,2,5
36,1193,One Flew Over the Cuckoo's Nest (1975),Drama,2,5
37,1196,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Drama|Sci-Fi|War,2,5
46,1246,Dead Poets Society (1989),Drama,2,5
47,1247,"Graduate, The (1967)",Drama|Romance,2,5
49,1259,Stand by Me (1986),Adventure|Comedy|Drama,2,5
51,1293,Gandhi (1982),Drama,2,5


In [20]:
rec.recommend_user_based(69)

Unnamed: 0,movie_id,title,genres
0,527,Schindler's List (1993),Drama|War
1,318,"Shawshank Redemption, The (1994)",Drama
2,608,Fargo (1996),Crime|Drama|Thriller
3,1213,GoodFellas (1990),Crime|Drama
4,150,Apollo 13 (1995),Drama
5,1094,"Crying Game, The (1992)",Drama|Romance|War
6,1179,"Grifters, The (1990)",Crime|Drama|Film-Noir
7,36,Dead Man Walking (1995),Drama
8,2333,Gods and Monsters (1998),Drama
9,3100,"River Runs Through It, A (1992)",Drama


In [21]:
# Testing Item based

In [22]:
rec = CfRec(user_item_m, X_item, movies)

In [23]:
rec.recommend_item_based(2021)

Because you liked Dune (1984), we'd recommend you to watch:


Unnamed: 0,movie_id,title,genres
0,541,Blade Runner (1982),Film-Noir|Sci-Fi
1,1200,Aliens (1986),Action|Sci-Fi|Thriller|War
2,1240,"Terminator, The (1984)",Action|Sci-Fi|Thriller
3,1371,Star Trek: The Motion Picture (1979),Action|Adventure|Sci-Fi
4,1374,Star Trek: The Wrath of Khan (1982),Action|Adventure|Sci-Fi
5,1375,Star Trek III: The Search for Spock (1984),Action|Adventure|Sci-Fi
6,1376,Star Trek IV: The Voyage Home (1986),Action|Adventure|Sci-Fi
7,1527,"Fifth Element, The (1997)",Action|Sci-Fi
8,2011,Back to the Future Part II (1989),Comedy|Sci-Fi
9,2105,Tron (1982),Action|Adventure|Fantasy|Sci-Fi
