# Lab 8: Recommender System

In this assignment, we will study how to do user-based collaborative filtering and item-based collaborative filtering. 

## 1. Dataset

In this assignment, we will use MovieLens-100K dataset. It includes about 100,000 ratings from 1000 users on 1700 movies.  

In [1]:
from math import sqrt
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors


# 1. load data
user_ratings_train = pd.read_csv('./ml-100k/u1.base',
                                 sep='\t', names=['user_id', 'movie_id', 'rating'], usecols=[0, 1, 2])

user_ratings_test = pd.read_csv('./ml-100k/u1.test',
                                sep='\t', names=['user_id', 'movie_id', 'rating'], usecols=[0, 1, 2])

movie_info = pd.read_csv('./ml-100k/u.item',
                         sep='|', names=['movie_id', 'title'], usecols=[0, 1],
                         encoding="ISO-8859-1")

user_ratings_train = pd.merge(movie_info, user_ratings_train)
user_ratings_test = pd.merge(movie_info, user_ratings_test)

# 2. get the rating matrix. Each row is a user, and each column is a movie.
user_ratings_train = user_ratings_train.pivot_table(index=['user_id'],
                                                    columns=['title'],
                                                    values='rating')

user_ratings_test = user_ratings_test.pivot_table(index=['user_id'],
                                                  columns=['title'],
                                                  values='rating')


user_ratings_train = user_ratings_train.reindex(
    index=user_ratings_train.index.union(user_ratings_test.index),
    columns=user_ratings_train.columns.union(user_ratings_test.columns))

user_ratings_test = user_ratings_test.reindex(
    index=user_ratings_train.index.union(user_ratings_test.index),
    columns=user_ratings_train.columns.union(user_ratings_test.columns))

print(user_ratings_train.shape)
print(user_ratings_test.shape)

item_data_train = user_ratings_train.T
item_data_test = user_ratings_test.T

(943, 1664)
(943, 1664)


In [2]:
user_ratings_train.head()

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,5.0,,,3.0,4.0,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,4.0,,,...,,,,4.0,,,,,,


In [3]:
user_ratings_train.isna().sum()

title
'Til There Was You (1997)                937
1-900 (1994)                             940
101 Dalmatians (1996)                    853
12 Angry Men (1957)                      841
187 (1997)                               905
                                        ... 
Young Guns II (1990)                     907
Young Poisoner's Handbook, The (1995)    910
Zeus and Roxanne (1997)                  939
unknown                                  940
Á köldum klaka (Cold Fever) (1994)       942
Length: 1664, dtype: int64

In [4]:
# first need to take care of null values
# we will take the mean to fill the null values
def fill_missing_values(df):
    # Calculate mean ratings by user
    mean = df.mean(axis=1)
    # Fill missing values with mean ratings by user
    df = df.T.fillna(mean).T

    return df


user_ratings_train = fill_missing_values(user_ratings_train)
user_ratings_train

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.671642,3.671642,3.671642,5.000000,3.671642,3.671642,3.000000,4.000000,3.671642,3.671642,...,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642
2,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,...,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000
3,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,...,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000
4,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,...,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143
5,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044,4.000000,2.956044,2.956044,...,2.956044,2.956044,2.956044,4.000000,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,...,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500
940,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,...,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944
941,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,...,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455
942,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,3.000000,4.256410,3.000000,...,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410


## Task 1. User-based CF

* Use pearson correlation to get the similarity between different users.
* Based on the obtained similarity score, predict the ratings. You can use 5 nearest neighbors or 10 nearest neighbors.
* Compute MAE for the testing set.

In [5]:
neighbors_count = 10
# we are using pearson to calculate a network
network = user_ratings_train.T.corr(method='pearson').values
model = NearestNeighbors(
    n_neighbors=neighbors_count).fit(network)

In [6]:
n_distance, n_ind = model.kneighbors(X=None)

In [7]:
# preprare train and test matrices
user_data_train = user_ratings_train.values
user_data_test = user_ratings_test.values

In [8]:
%%time
# loop over each value of the test set
y_true, y_pred = [], []
for user_id, user_ratings in enumerate(user_data_test):
    for video_id, video_rating in enumerate(user_ratings):
        # We need to take care of the case where the user has not rated the video
        if np.isnan(user_data_test[user_id, video_id]):
            continue
        neighbors = n_ind[user_id]
        neighbor_ratings = user_data_train[neighbors]
        video_ratings = neighbor_ratings[:, video_id]
        # get biases for each user
        biases = user_ratings_train.mean(axis=1).values[neighbors]
        self_bias = user_ratings_train.mean(axis=1).values[user_id]
        simarity_scores = network[user_id][neighbors]
        score = self_bias + \
            (np.sum((np.multiply(simarity_scores, video_ratings - biases))) /
             np.sum(simarity_scores))
        y_true.append(user_data_test[user_id, video_id])
        y_pred.append(score)

CPU times: user 1min 3s, sys: 3.09 s, total: 1min 6s
Wall time: 1min 6s


In [9]:
from sklearn.metrics import mean_absolute_error
error = mean_absolute_error(y_true, y_pred)
print(
    f'The mean absolute error for {neighbors_count} neighbors is {error:.2f}.')

The mean absolute error for 10 neighbors is 0.81.


## Task 2. Item-based CF
* Use cosine similarity to get the similarity between different items.
* Based on the obtained similarity score, predict the ratings. You can use 5 nearest neighbors or 10 nearest neighbors.
* Compute MAE for the testing set.

In [10]:
# your code
user_ratings_train

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.671642,3.671642,3.671642,5.000000,3.671642,3.671642,3.000000,4.000000,3.671642,3.671642,...,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642
2,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,...,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000
3,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,...,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000
4,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,...,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143
5,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044,4.000000,2.956044,2.956044,...,2.956044,2.956044,2.956044,4.000000,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,...,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500
940,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,...,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944
941,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,...,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455
942,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,3.000000,4.256410,3.000000,...,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410


In [11]:
user_ratings_train

title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.671642,3.671642,3.671642,5.000000,3.671642,3.671642,3.000000,4.000000,3.671642,3.671642,...,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642,3.671642
2,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,...,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000,3.800000
3,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,...,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000,3.000000
4,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,...,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143,4.357143
5,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044,4.000000,2.956044,2.956044,...,2.956044,2.956044,2.956044,4.000000,2.956044,2.956044,2.956044,2.956044,2.956044,2.956044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,...,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500,4.312500
940,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,...,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944,3.457944
941,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,...,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455,4.045455
942,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,3.000000,4.256410,3.000000,...,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410,4.256410


In [12]:
print(f'item_data_train: {item_data_train.isna().sum()}')
print(f'item_data_test: {item_data_test.isna().sum()}')

item_data_train: user_id
1      1530
2      1624
3      1636
4      1650
5      1573
       ... 
939    1616
940    1557
941    1642
942    1586
943    1496
Length: 943, dtype: int64
item_data_test: user_id
1      1527
2      1643
3      1638
4      1654
5      1580
       ... 
939    1664
940    1664
941    1664
942    1664
943    1664
Length: 943, dtype: int64


In [13]:
item_data_train = item_data_train.T.fillna(item_data_train.mean(axis=1)).T

In [14]:
item_data_train.isna().sum(axis=1).value_counts()

0      1633
943      31
Name: count, dtype: int64

In [15]:
num_entries_still_missing = item_data_train.isna().sum(axis=1)

In [16]:
len(num_entries_still_missing)

1664

In [17]:
# scan for movies with no ratings
movies_with_no_review = []
for movie_name, num_missing in num_entries_still_missing.items():
    if num_missing:
        movies_with_no_review.append(movie_name)

In [18]:
len(movies_with_no_review)

31

In [19]:
movies_with_no_review

['Aiqing wansui (1994)',
 'Bird of Prey (1996)',
 'Bloody Child, The (1996)',
 'Cyclo (1995)',
 'Daens (1992)',
 'Damsel in Distress, A (1937)',
 'Every Other Weekend (1990)',
 "Eye of Vichy, The (Oeil de Vichy, L') (1993)",
 'Farmer & Chase (1995)',
 'Fear, The (1995)',
 'Good Morning (1971)',
 'Great Day in Harlem, A (1994)',
 'Homage (1995)',
 "I Don't Want to Talk About It (De eso no se habla) (1993)",
 'Johns (1996)',
 'Killer: A Journal of Murder (1995)',
 'Lashou shentan (1992)',
 'Lotto Land (1995)',
 'Love Is All There Is (1996)',
 'Modern Affair, A (1995)',
 'Other Voices, Other Rooms (1997)',
 'Paris Was a Woman (1995)',
 'Police Story 4: Project S (Chao ji ji hua) (1993)',
 'Power 98 (1995)',
 'Promise, The (Versprechen, Das) (1994)',
 'T-Men (1947)',
 'Tigrero: A Film That Was Never Made (1994)',
 'Very Natural Thing, A (1974)',
 'Walk in the Sun, A (1945)',
 'Window to Paris (1994)',
 'Yankee Zulu (1994)']

In [20]:
# seems like 31 movies does not have any review we will drop them
item_data_train = item_data_train.drop(movies_with_no_review)

In [21]:
print(f'item_data_train shape after {item_data_train.shape}')

item_data_train shape after (1633, 943)


In [22]:
# now we will use the cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(item_data_train)

In [23]:
similarity

array([[1.        , 0.99940386, 0.99316766, ..., 0.99922188, 0.99942042,
        0.99975155],
       [0.99940386, 1.        , 0.99315524, ..., 0.99912261, 0.99922178,
        0.99965222],
       [0.99316766, 0.99315524, 1.        , ..., 0.99331934, 0.99315524,
        0.99350075],
       ...,
       [0.99922188, 0.99912261, 0.99331934, ..., 1.        , 0.99912261,
        0.9994702 ],
       [0.99942042, 0.99922178, 0.99315524, ..., 0.99912261, 1.        ,
        0.99965222],
       [0.99975155, 0.99965222, 0.99350075, ..., 0.9994702 , 0.99965222,
        1.        ]])

In [24]:
ItemNearestNeighborsModel = NearestNeighbors(n_neighbors=neighbors_count).fit(
    item_data_train.values)
dists, nbrs = ItemNearestNeighborsModel.kneighbors()

In [25]:
item_data_test

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Til There Was You (1997),,,,,,,,,,,...,,,,,,,,,,
1-900 (1994),,,,,,,,,,,...,,,,,,,,,,
101 Dalmatians (1996),2.0,,,,2.0,,,,,,...,,,,,,,,,,
12 Angry Men (1957),,,,,,,,,,,...,,,,,,,,,,
187 (1997),,,2.0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Young Guns II (1990),,,,,,,,,,,...,,,,,,,,,,
"Young Poisoner's Handbook, The (1995)",,,,,,,,,,,...,,,,,,,,,,
Zeus and Roxanne (1997),,,,,,,,,,,...,,,,,,,,,,
unknown,4.0,,,,4.0,,,,,,...,,,,,,,,,,


In [26]:
item_data_test = item_data_test.drop(
    movies_with_no_review).values

In [27]:
item_data_test.shape

(1633, 943)

In [28]:
item_data_train = item_data_train.values

In [29]:
item_data_train

array([[2.66666667, 2.66666667, 2.66666667, ..., 2.66666667, 2.66666667,
        2.66666667],
       [2.66666667, 2.66666667, 2.66666667, ..., 2.66666667, 2.66666667,
        2.66666667],
       [2.9       , 2.9       , 2.9       , ..., 2.9       , 2.9       ,
        2.9       ],
       ...,
       [2.        , 2.        , 2.        , ..., 2.        , 2.        ,
        2.        ],
       [2.66666667, 2.66666667, 2.66666667, ..., 2.66666667, 2.66666667,
        2.66666667],
       [3.        , 3.        , 3.        , ..., 3.        , 3.        ,
        3.        ]])

In [30]:
%%time
y_true = []
y_pred = []

for video_id, test_user_ratings in enumerate(item_data_test):
    neighbors = nbrs[video_id]

    for user_id, true_value in enumerate(test_user_ratings):
        if true_value > 0:
            y_true.append(true_value)
            given_ratings = item_data_train[neighbors, user_id]

        # get correlations
            sim_scores = similarity[video_id][neighbors]

            # get user ratings for neighbors
            user_ratings = item_data_train[neighbors, user_id]

            # calc pred
            sum_scores = np.sum(sim_scores)

            # if the sum of scores (denom) is non zero assign a score other wise just assign average of user ratings
            if sum_scores:
                rating_pred = np.sum(np.multiply(
                    sim_scores, user_ratings)) / sum_scores
            else:
                rating_pred = user_ratings_train.mean(axis=1)[user_id]

            y_pred.append(rating_pred)

CPU times: user 627 ms, sys: 329 ms, total: 956 ms
Wall time: 219 ms


In [31]:
from sklearn.metrics import mean_absolute_error
error = mean_absolute_error(y_true, y_pred)
print(
    f'The mean absolute error for {neighbors_count} neighbors is {error:.2f}.')

The mean absolute error for 10 neighbors is 0.82.
