In [8]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.decomposition import NMF
from LMF import LatentFactorModel


import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [9]:
df = pd.read_csv(
    "./ml-100k/u.data",
    sep="\t",
    names=["user", "item", "rating", "timestamp"],
)

print(f"Number of users: {df['user'].nunique()}")
print(f"Number of items: {df['item'].nunique()}")
print(f"Number of ratings: {len(df)}")

Number of users: 943
Number of items: 1682
Number of ratings: 100000


In [10]:
user_map = {id_: idx for idx, id_ in enumerate(df['user'].unique())}
item_map = {id_: idx for idx, id_ in enumerate(df['item'].unique())}

df['user_idx'] = df['user'].map(user_map)
df['item_idx'] = df['item'].map(item_map)

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

train_ratings = list(zip(train_data['user_idx'], train_data['item_idx'], train_data['rating']))
test_ratings = list(zip(test_data['user_idx'], test_data['item_idx'], test_data['rating']))

In [11]:
import time

n_users = df['user_idx'].nunique()
n_items = df['item_idx'].nunique()

lfm = LatentFactorModel(n_users=n_users, n_items=n_items)

start = time.time()
lfm.fit(train_ratings, learning_rate=0.01, reg=0.05, n_epochs=15)
elapsed = time.time() - start
print(f"Время обучения (custom): {elapsed:.2f} секунд")

Epoch 1/15 | Train RMSE: 0.9943
Epoch 2/15 | Train RMSE: 0.9310
Epoch 3/15 | Train RMSE: 0.9113
Epoch 4/15 | Train RMSE: 0.8995
Epoch 5/15 | Train RMSE: 0.8903
Epoch 6/15 | Train RMSE: 0.8822
Epoch 7/15 | Train RMSE: 0.8738
Epoch 8/15 | Train RMSE: 0.8648
Epoch 9/15 | Train RMSE: 0.8545
Epoch 10/15 | Train RMSE: 0.8429
Epoch 11/15 | Train RMSE: 0.8299
Epoch 12/15 | Train RMSE: 0.8158
Epoch 13/15 | Train RMSE: 0.8012
Epoch 14/15 | Train RMSE: 0.7863
Epoch 15/15 | Train RMSE: 0.7710
Время обучения (custom): 10.64 секунд


In [13]:
from sklearn.metrics import mean_squared_error

y_true = []
y_pred = []

for user, item, rating in test_ratings:
    y_true.append(rating)
    y_pred.append(lfm.predict(user, item))

rmse = mean_squared_error(y_true, y_pred)
print(f"RMSE (custom): {rmse:.4f}")

RMSE (custom): 0.8505


In [15]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# Создаём sparse матрицу
train_matrix = csr_matrix((train_data['rating'], (train_data['user_idx'], train_data['item_idx'])),
                          shape=(n_users, n_items))

svd = TruncatedSVD(n_components=40, random_state=71)

start = time.time()
U = svd.fit_transform(train_matrix)
V = svd.components_.T
elapsed = time.time() - start
print(f"Время обучения (scikit-learn): {elapsed:.2f} секунд")

# Предсказания
y_pred_sklearn = []
for user, item, rating in test_ratings:
    y_pred_sklearn.append(U[user] @ V[item])

rmse_sklearn = mean_squared_error(y_true, y_pred_sklearn)
print(f"RMSE (scikit-learn): {rmse_sklearn:.4f}")


Время обучения (scikit-learn): 0.14 секунд
RMSE (scikit-learn): 7.8529
