In [3]:
import pandas as pd
import numpy as np

ratings_fields = ['userId', 'movieId', 'rating']
movies_fields = ['movieId', 'title']

ratings = pd.read_csv("./data/ratings.csv", encoding="ISO-8859-1", usecols=ratings_fields)
movies = pd.read_csv("./data/movies.csv", encoding="ISO-8859-1", usecols=movies_fields)
ratings = pd.merge(ratings, movies, on='movieId')

In [28]:
# empty dataframe for movie-movie afiniti score
movie_afiniti = pd.DataFrame(columns=[
    'base_movieId',
    'base_movieTitle',
    'associated_movieId',
    'associated_movieTitle',
    'afiniti_score'])


# get unique movies 
distinct_movies = np.unique(ratings['movieId'])

# movieId of movie viewer watched
ref_movie = 10
m_data = ratings[ratings['movieId'] == ref_movie]


#compare m1 with every other movie in distinct_movies 
for m1 in distinct_movies:
  
  if m1 == ref_movie:
    continue
  
  # count distinct viewers of m1
  m1_data = ratings[ratings['movieId'] == m1]
  m1_viewers = np.unique(m1_data['userId'])
  
  # find movies watched by same set of users to calculate afiniti score
  m2_viewers = np.intersect1d(m1_viewers, [m_data['userId']])
      
  # find common viewers of m2 and m1
  common_viewers = len(np.unique(m2_viewers))
  afiniti_score = float(common_viewers)/float(len(m1_viewers))

  # update movie_afiniti score dataframe
  movie_afiniti = movie_afiniti.append({
      "base_movieId": ref_movie,
      "base_movieTitle": m_data.loc[m_data['movieId'] == ref_movie, 'title'].iloc[0],
      "associated_movieId": m1,
      "associated_movieTitle": m1_data.loc[m1_data['movieId'] == m1, 'title'].iloc[0],
      "afiniti_score": afiniti_score
      
  }, ignore_index=True)
  
movie_afiniti = movie_afiniti.sort_values(['afiniti_score'], ascending=False)

# For better recommendations, set afiniti score threshold
similar_movies = movie_afiniti[(movie_afiniti['afiniti_score'] > 0.6)]

similar_movies.head(10)

Unnamed: 0,base_movieId,base_movieTitle,associated_movieId,associated_movieTitle,afiniti_score
292,10,GoldenEye (1995),511,"Program, The (1993)",1.0
248,10,GoldenEye (1995),423,Blown Away (1994),0.925926
1119,10,GoldenEye (1995),2376,"View to a Kill, A (1985)",0.921875
135,10,GoldenEye (1995),227,Drop Zone (1994),0.888889
483,10,GoldenEye (1995),1003,Extreme Measures (1996),0.888889
34,10,GoldenEye (1995),42,Dead Presidents (1995),0.888889
155,10,GoldenEye (1995),259,Kiss of Death (1995),0.888889
1687,10,GoldenEye (1995),4005,"Living Daylights, The (1987)",0.884615
215,10,GoldenEye (1995),360,I Love Trouble (1994),0.882353
1579,10,GoldenEye (1995),3635,"Spy Who Loved Me, The (1977)",0.871795
