In [19]:
import os
import sys
from pathlib import Path
import pandas as pd
from rectools.columns import Columns
from hydra import compose, initialize

In [22]:
str(Path(os.getcwd()).parent)

'/Users/alexander/Developer/RecSys'

In [20]:
sys.path.append(str(Path(os.getcwd()).parent))

In [13]:
sys.path.append("../../")

In [9]:
from ml_project.data import (
    InteractionsTransformer,
    read_data
)

In [44]:
with initialize(config_path="../configs", version_base="1.1"):
    conf = compose(config_name="train_config", return_hydra_config=True)

In [11]:
conf.data.input.interactions.path

'data/raw/BX-Book-Ratings.csv'

In [27]:
interactions_df = read_data(
    path="/Users/alexander/Developer/RecSys/data/raw/BX-Book-Ratings.csv",
    read_params=conf.data.input.interactions.read_params
)

In [29]:
transformer = InteractionsTransformer(interactions_column_params=conf.data.input.interactions.column_params)

In [32]:
from ml_project.data.make_dataset import split_data_for_train_test
from ml_project.models.model_fit_predict import evaluate_model


interactions_df = transformer.fit_transform(interactions_df)

interactions_df.info()

train_df, test_df = split_data_for_train_test(
    interactions_df=interactions_df,
    splitter_params=conf.splitter_params
)


test_df = test_df[test_df[Columns.User].isin(train_df[Columns.User])]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 433671 entries, 0 to 433670
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   user_id   433671 non-null  int64  
 1   item_id   433671 non-null  int64  
 2   weight    433671 non-null  float64
 3   datetime  433671 non-null  object 
dtypes: float64(1), int64(2), object(1)
memory usage: 13.2+ MB


In [33]:
interactions_df

Unnamed: 0,user_id,item_id,weight,datetime
0,0,126,5.0,2024-10-09
1,0,97785,5.0,2024-10-09
2,0,124868,5.0,2024-10-09
3,0,134660,5.0,2024-10-09
4,0,139277,6.0,2024-10-09
...,...,...,...,...
433666,77804,39186,7.0,2024-10-09
433667,77804,51237,7.0,2024-10-09
433668,77804,51292,7.0,2024-10-09
433669,77804,73289,6.0,2024-10-09


In [37]:
conf.train_params

{'model_type': 'SVD'}

In [45]:
from rectools.dataset import Dataset
from rectools.metrics import calc_metrics
from rectools.models import PureSVDModel
import typing as tp


from ml_project.data.make_dataset import prepare_metrics_dict
dataset = Dataset.construct(interactions_df=train_df)

if conf.train_params.model_type == "SVD":
    model = PureSVDModel(factors=20)

model.fit(dataset)

users_to_predict = test_df[Columns.User].unique()

recs_df = model.recommend(
    users=users_to_predict,
    dataset=dataset,
    **conf.predict_params
)

metrics_dict = prepare_metrics_dict(conf.metric_params)

metrics: tp.Dict[str, float] = calc_metrics(
    metrics=metrics_dict,
    reco=recs_df,
    interactions=test_df,
    prev_interactions=train_df,
)

In [46]:
metrics

{'Recall': 0.012048323733016586, 'Precision': 0.000247161948555827}