In [None]:
pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m6.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp312-cp312-linux_x86_64.whl size=2555148 sha256=0bf3aa0096be1eaeb0ea274e736c28ef09159cc3545f816478d889377dcf51c4
  Stored in directory: /root/.cache/pip/wheels/75/fa/bc/739bc2cb1

In [None]:
!pip install numpy==1.26.4

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m96.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pytensor 2.35.1 requires numpy>=2.0, but you hav

In [None]:
import pandas as pd
from surprise import Dataset, Reader, SVD, SVDpp, NMF
from surprise.model_selection import GridSearchCV, cross_validate


In [3]:
ratings = pd.read_csv("ml-32m/ratings.csv")

In [4]:
reader = Reader(rating_scale=(0.5, 5.0))

data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [5]:
ratings['movieId'] = ratings['movieId'].astype(int)

In [6]:
print(ratings.dtypes)

userId         int64
movieId        int64
rating       float64
timestamp    float64
dtype: object


Підбір найкращих параметрів для SVD

In [7]:
param_grid = {
    'n_factors': [20, 50],
    'reg_all': [0.02, 0.1],
    'lr_all': [0.002]
}

gs = GridSearchCV(
    SVD,
    param_grid,
    measures=['rmse', 'mae'],
    cv=3,
    n_jobs=-1
)

print("Пошук найкращих параметрів SVD..")
gs.fit(data)

print("\nНайкращі параметри для SVD (за RMSE):")
print(gs.best_params['rmse'])
print("Найкращий RMSE:", gs.best_score['rmse'])

Пошук найкращих параметрів SVD..

Найкращі параметри для SVD (за RMSE):
{'n_factors': 20, 'reg_all': 0.02, 'lr_all': 0.002}
Найкращий RMSE: 0.8753756223227974


Оцінка моделей: SVD, SVD++, NMF

In [8]:
algorithms = {
    "SVD (best params)": SVD(**gs.best_params['rmse']),
    "SVD++": SVDpp(),
    "NMF": NMF()
}

results = {}

print("\nКрос-валідація моделей..")
for name, algo in algorithms.items():
    cv_results = cross_validate(
        algo,
        data,
        measures=['RMSE', 'MAE'],
        cv=3,
        n_jobs=-1,
        verbose=False
    )

    results[name] = {
        'rmse': cv_results['test_rmse'].mean(),
        'mae': cv_results['test_mae'].mean()
    }


Крос-валідація моделей..


Результати:

In [11]:
print("\nПорівнюємо моделі:")
for model, metrics in results.items():
    print(f"{model}: RMSE={metrics['rmse']:.4f}, MAE={metrics['mae']:.4f}")

best_model = min(results, key=lambda x: results[x]['rmse'])
print("\nНайкраща модель(за RMSE):", best_model)


Порівнюємо моделі:
SVD (best params): RMSE=0.8755, MAE=0.6671
SVD++: RMSE=0.8395, MAE=0.6336
NMF: RMSE=0.8980, MAE=0.6808

Найкраща модель(за RMSE): SVD++
