In [4]:
import pandas as pd

data = pd.read_csv('../data/ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
data.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
from sklearn.model_selection import train_test_split

data = data[['user_id', 'item_id', 'rating']]
users = data['user_id'].unique()
items = data['item_id'].unique()
user_to_index = {u: idx for idx, u in enumerate(users)}
item_to_index = {i: idx for idx, i in enumerate(items)}
data['user_idx'] = data['user_id'].map(user_to_index)
data['item_idx'] = data['item_id'].map(item_to_index)

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data.head()

Unnamed: 0,user_id,item_id,rating,user_idx,item_idx
75220,807,1411,1,804,901
48955,474,659,5,467,488
44966,463,268,4,465,139
13568,139,286,4,321,289
92727,621,751,4,618,261


In [6]:
import time
from LFM import LFM

n_users = len(users)
n_items = len(items)
model = LFM(n_users=n_users, n_items=n_items, n_factors=20, lr=0.01, reg_coef=0.02, n_epochs=20)

start1 = time.time()
model.fit(train_data)
end1 = time.time()

rmse, mae = model.evaluate(test_data)
print(f"RMSE кастомного алгоритма: {rmse:.3f}")
print(f"MAE кастомного алгоритма: {mae:.3f}")
print(f"Время обучения кастомного алгоритма: {end1 - start1:.3f} с")

AttributeError: 'LFM' object has no attribute 'reg'

In [15]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split


reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_builtin('ml-100k', prompt=False)
trainset, testset = train_test_split(surprise_data, test_size=0.2, random_state=42)

model_surprise = SVD(n_factors=20, lr_all=0.01, reg_all=0.02, n_epochs=20)
start2 = time.time()
model_surprise.fit(trainset)
end2 = time.time()

predictions = model_surprise.test(testset)
rmse_surprise = np.sqrt(mean_squared_error([p.r_ui for p in predictions], [p.est for p in predictions]))
mae_surprise = mean_absolute_error([p.r_ui for p in predictions], [p.est for p in predictions])
print(f"RMSE библиотечного алгоритма: {rmse_surprise:.3f}")
print(f"MAE библиотечного алгоритма: {mae_surprise:.3f}")
print(f"Время обучения библиотечного алгоритма: {end2 - start2:.3f} с")

RMSE библиотечного алгоритма: 0.940
MAE библиотечного алгоритма: 0.737
Время обучения библиотечного алгоритма: 0.771 с
