In [None]:
# Import libraries
import pandas as pd
import numpy as np

In [None]:
# Read file and print resulting dataframe
df = pd.read_csv('Recommend.csv')
df

In [None]:
# Since we're developing a user-movie recommendation model, we must be aware of the number of users and movies
n_users = df.user_id.unique().shape[0]
n_movies = df.movie_id.unique().shape[0]

In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df, test_size=0.25)

In [None]:
## Training Data
# Create a zero matrix with the dimensions of number of users and number of movies
train_data_matrix = np.zeros((n_users, n_movies))

# Populate the matrix with ratings such that the User ID index - Movie ID index returns the rating
for line in train_data.itertuples():
  train_data_matrix[line[1]-1, line[2]-1] = line[3]
train_data_matrix

In [None]:
## Test Data
# Create a zero matrix with the dimensions of number of users and number of movies
test_data_matrix = np.zeros((n_users, n_movies))

# Populate the matrix with ratings such that the User ID index - Movie ID index returns the rating
for line in test_data.itertuples():
  test_data_matrix[line[1]-1, line[2]-1] = line[3]
test_data_matrix

In [None]:
# Cosine similarities between users and movies - similarity between non-zero vectors of an inner product space that measures the cosine of an angle between them
# The cosine of zero is 1, and it is less than 1 for any angle from 0 to pi radians
from sklearn.metrics import pairwise_distances
user_similarity = pairwise_distances(train_data_matrix, metric='cosine')
movie_similarity = pairwise_distances(train_data_matrix.T, metric='cosine')

# Create predictions based on the fact that user-movie collaborative filtering difference from mean rating is a better indicator than absolute rating
mean_user_rating = train_data_matrix.mean(axis=1)[:,np.new_axis]
ratings_diff = train_data_matrix - mean_user_rating

# Calculate user predictions
user_pred = mean_user_rating + user_similarity.dot(ratings_diff)/np.array([np.abs(user_similarity).sum(axis=1)]).T

# The output matrix describes how those unrelated movies will be rated by users based on their prior rating styles