In [1]:
from math import sqrt
from operator import itemgetter

import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

In [2]:
with open('data/ml-100k/u1.base') as file:
    records = []
    for line in file:
        user, item, rating, _ = line.split('	')

        records.append([user, item, int(rating)])
len(records)

80000

In [3]:
"""global average rating"""
ratings = []
for record in records:
    ratings.append(record[-1])
r = sum(ratings) / len(ratings)
r

3.52835

In [4]:
"""average rating of user u"""
ratings_u, r_u = {}, {}
for record in records:
    user, item, rating = record
    ratings_u.setdefault(user, {})
    ratings_u[user][item] = rating

for user in ratings_u.keys():
    r_u[user] = sum(ratings_u[user].values()) / len(ratings_u[user].values())

In [5]:
"""average rating of item i"""
ratings_i, r_i = {}, {}
for record in records:
    user, item, rating = record
    ratings_i.setdefault(item, {})
    ratings_i[item][user] = rating

for item in ratings_i.keys():
    r_i[item] = sum(ratings_i[item].values()) / len(ratings_i[item].values())

In [6]:
"""bias of user u"""
b_u = {}
for user in ratings_u.keys():
    b_u.setdefault(user, 0)
    for item in ratings_u[user].keys():
        b_u[user] += (ratings_u[user][item] - r_i[item]) / len(ratings_u[user].values())

In [7]:
"""bias of item i"""
b_i = {}
for item in ratings_i.keys():
    b_i.setdefault(item, 0)
    for user in ratings_i[item].keys():
        b_i[item] += (ratings_i[item][user] - r_u[user]) / len(ratings_i[item].values())

In [8]:
"""load test data"""
records_test = []
users_test = []
items_test = []
ratings_test = []
with open("data/ml-100k/u1.test") as file:
    for line in file:
        user, item, rating, _ = line.split('	')
        if item == '599':
            print("kk")
        records_test.append([user, item])
        users_test.append(user)
        items_test.append(item)
        ratings_test.append(int(rating))
users_test = np.array(users_test)
items_test = np.array(items_test)
ratings_test = np.array(ratings_test)
np.average(ratings_test)

kk


3.5359

In [9]:
"""user average"""
def user_avg():
    ratings_predict = []
    for user in users_test:
        ratings_predict.append(r_u[user])
    ratings_predict = np.array(ratings_predict)
    # rmse_test = sqrt(metrics.mean_squared_error(ratings_test, ratings_predict))
    # mae_test = metrics.mean_absolute_error(ratings_test, ratings_predict)
    return ratings_predict

In [10]:
"""user average"""
def item_avg():
    ratings_predict = []
    for item in items_test:
        r_i.setdefault(item, r)
        ratings_predict.append(r_i[item])
    ratings_predict = np.array(ratings_predict)
    # rmse_test = sqrt(metrics.mean_squared_error(ratings_test, ratings_predict))
    # mae_test = metrics.mean_absolute_error(ratings_test, ratings_predict)
    return ratings_predict

In [11]:
"""mean of user average and item average"""
def mean_user_item():
    """return RMSE, MAE"""
    ratings_predict = user_avg() / 2 + item_avg() / 2
    rmse_test = sqrt(metrics.mean_squared_error(ratings_test, ratings_predict))
    mae_test = metrics.mean_absolute_error(ratings_test, ratings_predict)
    return rmse_test, mae_test

In [12]:
"""user bias and item average"""
def user_bias_item_avg():
    ratings_predict = []
    for user, item in zip(users_test, items_test):
        b_u.setdefault(user, 0)
        r_i.setdefault(item, r)
        ratings_predict.append(b_u[user] + r_i[item])
    ratings_predict = np.array(ratings_predict)
    # rmse_test = sqrt(metrics.mean_squared_error(ratings_test, ratings_predict))
    # mae_test = metrics.mean_absolute_error(ratings_test, ratings_predict)
    return ratings_predict

In [13]:
"""user average and item bias"""
def user_avg_item_bias():
    ratings_predict = []
    for user, item in zip(users_test, items_test):
        r_u.setdefault(user, r)
        b_i.setdefault(item, 0)
        ratings_predict.append(r_u[user] + b_i[item])
    ratings_predict = np.array(ratings_predict)
    # rmse_test = sqrt(metrics.mean_squared_error(ratings_test, ratings_predict))
    # mae_test = metrics.mean_absolute_error(ratings_test, ratings_predict)
    return ratings_predict

In [14]:
"""global average, user bias and item bias"""
def global_avg_user_bias_item_bias():
    ratings_predict = []
    for user, item in zip(users_test, items_test):
        b_u.setdefault(user, 0)
        b_i.setdefault(item, 0)
        ratings_predict.append(r + b_u[user] + b_i[item])
    ratings_predict = np.array(ratings_predict)
    # rmse_test = sqrt(metrics.mean_squared_error(ratings_test, ratings_predict))
    # mae_test = metrics.mean_absolute_error(ratings_test, ratings_predict)
    return ratings_predict

In [15]:
"""RMSE and MAE"""
def performance(y_predict, y_test=ratings_test):
    rmse_test = sqrt(metrics.mean_squared_error(y_test, y_predict))
    mae_test = metrics.mean_absolute_error(y_test, y_predict)
    return rmse_test, mae_test

In [16]:
print(performance(user_avg()))
print(performance(item_avg()))
mean_user_item()
performance(user_bias_item_avg())
performance(user_avg_item_bias())
performance(global_avg_user_bias_item_bias())

(1.0629951276561334, 0.8501912740150434)
(1.0334113714152895, 0.8275684032890005)


(0.962331641550567, 0.7612786028606267)

In [17]:
gg = [[1, 4], [3, 2]]
print(sorted(gg, key=itemgetter(1)))

[[3, 2], [1, 4]]
