In [1]:
import pandas as pd
import turicreate
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

In [2]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('MovieLens/ml-100k/u.data', sep = '\t', names = r_cols, encoding = 'latin-1')
ratings_train = pd.read_csv('MovieLens/ml-100k/ua.base', sep = '\t', names = r_cols, encoding = 'latin-1')
ratings_test = pd.read_csv('MovieLens/ml-100k/ua.test', sep = '\t', names = r_cols, encoding = 'latin-1')
ratings_train.shape, ratings_test.shape, ratings.shape

((90570, 4), (9430, 4), (100000, 4))

In [3]:
n_users = ratings.user_id.unique().shape[0]
n_items = ratings.movie_id.unique().shape[0]

In [4]:
data_matrix = np.zeros((n_users, n_items))
for line in ratings.itertuples():
    data_matrix[line[1] - 1, line[2] - 1] = line[3]
data_matrix.shape

(943, 1682)

In [5]:
user_similarity = pairwise_distances(data_matrix, metric = 'cosine')
item_similarity = pairwise_distances(data_matrix.T, metric = 'cosine')
user_similarity.shape, item_similarity.shape

((943, 943), (1682, 1682))

In [6]:
def predict(ratings, similarity, type = 'user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis = 1)
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis = 1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis = 1)])
    return pred

In [7]:
user_prediction = predict(data_matrix, user_similarity, 'user')
item_prediction = predict(data_matrix, item_similarity, 'item')

In [21]:
train_data = turicreate.SFrame(ratings)
test_data = turicreate.SFrame(ratings_test)

In [9]:
popularity_model = turicreate.popularity_recommender.create(train_data, user_id = 'user_id', item_id = 'movie_id', target = 'rating')

In [10]:
popularity_recomm = popularity_model.recommend(users = [1, 2, 3, 4, 5], k = 5)

In [11]:
popularity_recomm.print_rows(num_rows = 25)

+---------+----------+-------+------+
| user_id | movie_id | score | rank |
+---------+----------+-------+------+
|    1    |   1536   |  5.0  |  1   |
|    1    |   1201   |  5.0  |  2   |
|    1    |   1189   |  5.0  |  3   |
|    1    |   1122   |  5.0  |  4   |
|    1    |   814    |  5.0  |  5   |
|    2    |   1536   |  5.0  |  1   |
|    2    |   1201   |  5.0  |  2   |
|    2    |   1189   |  5.0  |  3   |
|    2    |   1122   |  5.0  |  4   |
|    2    |   814    |  5.0  |  5   |
|    3    |   1536   |  5.0  |  1   |
|    3    |   1201   |  5.0  |  2   |
|    3    |   1189   |  5.0  |  3   |
|    3    |   1122   |  5.0  |  4   |
|    3    |   814    |  5.0  |  5   |
|    4    |   1536   |  5.0  |  1   |
|    4    |   1201   |  5.0  |  2   |
|    4    |   1189   |  5.0  |  3   |
|    4    |   1122   |  5.0  |  4   |
|    4    |   814    |  5.0  |  5   |
|    5    |   1536   |  5.0  |  1   |
|    5    |   1201   |  5.0  |  2   |
|    5    |   1189   |  5.0  |  3   |
|    5    | 

### Collaborative Filtering Model

In [22]:
item_sim_model = turicreate.item_similarity_recommender.create(train_data, user_id = 'user_id', item_id = 'movie_id', target = 'rating', similarity_type = 'cosine')

In [25]:
item_sim_recomm = item_sim_model.recommend(users = [0, 654, 655, 653, 5], k = 5)

In [27]:
item_sim_recomm.print_rows(num_rows = 25)

+---------+----------+---------------------+------+
| user_id | movie_id |        score        | rank |
+---------+----------+---------------------+------+
|    0    |    50    |  0.4229108190536499 |  1   |
|    0    |   174    |  0.4134716308116913 |  2   |
|    0    |   181    | 0.40375013947486876 |  3   |
|    0    |    56    |  0.3640720546245575 |  4   |
|    0    |   172    |  0.3639549827575684 |  5   |
|   654   |   202    |  1.1108124876508907 |  1   |
|   654   |    96    |  1.0672064906074887 |  2   |
|   654   |   176    |  0.9660058025600148 |  3   |
|   654   |   228    |  0.9497228483764493 |  4   |
|   654   |   161    |  0.895231672290231  |  5   |
|   655   |   168    |  0.4177620355230178 |  1   |
|   655   |   173    | 0.34194552889705576 |  2   |
|   655   |   180    | 0.25971873198112433 |  3   |
|   655   |   194    | 0.24046829225373095 |  4   |
|   655   |    71    | 0.20195317303177213 |  5   |
|   653   |   173    |  0.7365138730817464 |  1   |
|   653   | 