In [1]:
import yaml
from src.data.data_loader import load_data
from src.data.prepare_data import prepare_data
import pandas as pd
import mlflow

from surprise import SVD
from src.models.models import model_testing

In [2]:
# read config
with open('main/config_evaluate.yml', 'r') as file:
    config=yaml.load(file, Loader=yaml.SafeLoader)

In [3]:
# load and prepare data
dataframes=load_data(config=config['data_loader'])
dataframes=prepare_data(dataframes=dataframes)


# split into train and test sets
dataframes['train_set']=dataframes['ratings'][dataframes['ratings']['date']<config['model']['test_date']]
dataframes['test_set']=dataframes['ratings'][dataframes['ratings']['date']>=config['model']['test_date']]

In [4]:
# mlflow tracking
mlflow.set_tracking_uri(uri=config['tracking']['uri'])
mlflow.set_experiment(experiment_name=config['tracking']['experiment_name'])
mlflow.autolog(disable=True)

with mlflow.start_run(run_name=config['tracking']['run_name']):
    with mlflow.start_run(run_name=config['model']['algo_class'], nested=True):

        mlflow.set_tag(key='algorithm', value=config['model']['algo_class'])
        mlflow.log_params(params=config['model']['params'])
        mlflow.log_dict(dictionary=config, artifact_file="config_evaluate.yml")

        model__testing=model_testing(
            algo_class=config['model']['algo_class']
            , params=config['model']['params']
            , metrics=config['model']['metrics']
            )
        model__testing.fit(train_set=dataframes['train_set'])
        model__testing.evaluate(test_set=dataframes['test_set'])
# Compute and print Root Mean Squared Error
#mlflow.log_metric(key='split'+str(i)+'_validation_rmse', value=accuracy.rmse(predictions, verbose=False))

RMSE: 1.0262


In [None]:
from surprise import KNNWithMeans, SVD, NMF, CoClustering, accuracy
from surprise import Dataset, Reader
from src.data.data_loader import Dataset_custom

reader = Reader(rating_scale=(1, 5))
dataframes['train_set'] = Dataset.load_from_df(dataframes['train_set'][["userId", "itemId", "rating"]], reader)
dataframes['train_set']=dataframes['train_set'].build_full_trainset()

dataframes['test_set'] = list(dataframes['test_set'][["userId", "itemId", "rating"]].itertuples(index=False, name=None))

svd = SVD()
svd.fit(dataframes['train_set'])

predictions = svd.test(dataframes['test_set'])
rmse = accuracy.rmse(predictions)