In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_movies = pd.read_csv("data/ml-25m/movies.csv")
df_ratings = pd.read_csv("data/ml-25m/ratings.csv")

In [3]:
df_ratings["userId"].unique()

array([     1,      2,      3, ..., 162539, 162540, 162541])

In [4]:
df_movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
df_joined = (
  df_ratings[df_ratings['userId'].between(1, 5000)]
    .merge(df_movies[['movieId', 'title']], on='movieId', how='left')
)
# user-item matrix (rows: users, cols: movies, values: ratings)
ratings_matrix = df_joined.pivot_table(
  index='userId',
  columns='title',
  values='rating'
)


In [6]:
# 1⃣  similarity between items (movies)
item_sim = pd.DataFrame(
  cosine_similarity(ratings_matrix.T.fillna(0)),
  index=ratings_matrix.columns,
  columns=ratings_matrix.columns
)

# 2⃣  prediction function for one user
def predict_for_user(user_id, k=20):
  seen = ratings_matrix.loc[user_id].dropna()
  sims  = item_sim[seen.index]                     # similarities to movies the user rated
  weights = sims.div(sims.sum(axis=1), axis=0)     # normalise row-wise

  # weighted sum of user’s own ratings
  scores = (weights * seen).sum(axis=1)

  # filter out already-rated titles
  return scores.drop(seen.index).sort_values(ascending=False)

# example: top-10 recs for user 1
top10 = predict_for_user(99).head(25)
print(top10)

title
Ip Man: The Final Fight (2013)                            5.000000
What Women Want (a.k.a. I Know a Woman's Heart) (2011)    5.000000
Holding the Man (2015)                                    5.000000
Warkop DKI Reborn: Jangkrik Boss! (2016)                  5.000000
SPF-18 (2017)                                             5.000000
Tarantella (1995)                                         5.000000
10th & Wolf (2006)                                        5.000000
Amityville: The Awakening (2017)                          5.000000
Hells Angels on Wheels (1967)                             4.902785
Catfish in Black Bean Sauce (2000)                        4.902785
TV Set, The (2006)                                        4.628988
Blind Shaft (Mang jing) (2003)                            4.628988
Resurrection (1980)                                       4.628988
City of Hope (1991)                                       4.628988
Viva Zapata! (1952)                                     

In [None]:
item_sim.head()