In [0]:
import numpy as np
import pandas as pd

In [0]:
movies = pd.read_csv("/content/drive/My Drive/datasets/movies.csv")
ratings = pd.read_csv("/content/drive/My Drive/datasets/ratings.csv")

In [0]:
ratings = ratings.drop(columns=['timestamp'])

In [0]:
count = len(ratings)
preferences = {}

In [0]:
ratings_array = ratings.values

In [0]:
for i in range(count):
  userId = ratings_array[i, 0]
  movieId = ratings_array[i, 1]
  rating = ratings_array[i, 2]
    
  if userId not in preferences.keys():
    preferences[userId] = {}
    
  preferences[userId][movieId] = rating

In [0]:
def sim_pearson(preferences, person1,person2):
    
  similarity = {}

  #getting similar movie watched

  for movie in preferences[person1]:
    if movie in preferences[person2]:
      similarity[movie] = 1

  if len(similarity) == 0:
    return 0

  sum1 = np.sum([preferences[person1][movie] for movie in similarity])
  sum2 = np.sum([preferences[person2][movie] for movie in similarity])

  sum1sq = np.sum([np.power(preferences[person1][movie],2) for movie in similarity])
  sum2sq = np.sum([np.power(preferences[person2][movie],2) for movie in similarity])

  pSum = np.sum([preferences[person1][movie] * preferences[person2][movie] for movie in similarity])

  num = pSum - (sum1 * sum2/len(similarity))
  den = np.sqrt((sum1sq - np.power(sum1,2)/len(similarity))*(sum2sq - np.power(sum2,2)/len(similarity)))
  if den == 0:
    return 0

  r = num/den

  return r

In [0]:
def top_matches(preferences, person, n = 5, similarity = sim_pearson):
    
  scores = [(similarity(preferences,person,reviewer),reviewer) for reviewer in preferences if reviewer != person]

  scores.sort()
  scores.reverse()
  return scores[:n]

In [0]:
top_match = top_matches(preferences,1)

In [0]:
top_match

[(1.000000000000016, 550.0),
 (1.0, 598.0),
 (1.0, 333.0),
 (1.0, 146.0),
 (1.0, 106.0)]

In [0]:
def recommendations(preferences, person, similarity = sim_pearson):
  total = {}
  simsum = {}

  for reviwer in preferences:
    if reviwer == person:
      continue
    sim = similarity(preferences,person,reviwer)
      
    if sim <= 0:
      continue
    for movie in preferences[reviwer]:
          
      if movie not in preferences[person] or preferences[person][movie] == 0:
        total.setdefault(movie,0)
        total[movie] += preferences[reviwer][movie] * sim
              
        simsum.setdefault(movie,0)
        simsum[movie] += sim
              
  ranks = [(total/simsum[movie],movie) for movie,total in total.items()]

  ranks.sort()
  ranks.reverse()

  return ranks[:10],person

In [0]:
movies_array = movies.values

In [0]:
top_rec,user = recommendations(preferences,1)

In [0]:
movies_list = []

for i in top_rec:
    for j in range(len(movies_array)):
        if i[1] == movies_array[j][0]:
            movies_list.append(movies_array[j])

In [0]:
print("Top Recommendations for you:\n")
for i in movies_list:
  print(i[1])

Top Recommendations for you:

Rivers and Tides (2001)
Come and See (Idi i smotri) (1985)
Belle époque (1992)
Won't You Be My Neighbor? (2018)
Loving Vincent (2017)
Black Mirror
Bobik Visiting Barbos (1977)
In the blue sea, in the white foam. (1984)
On the Trail of the Bremen Town Musicians (1973)
Gena the Crocodile (1969)
