In [None]:
import yaml
from src.data.data_loader import load_data
from src.data.prepare_data import prepare_data
import pandas as pd
import mlflow

from surprise import SVD
from src.models.models import model_evaluation

In [None]:
# read config
with open('main/config_evaluate.yml', 'r') as file:
    config=yaml.load(file, Loader=yaml.SafeLoader)

In [None]:
# load and prepare data
dataframes=load_data(config=config['data_loader'])
dataframes=prepare_data(dataframes=dataframes)

# split into train and test sets
dataframes['train_set']=dataframes['ratings'][dataframes['ratings']['date']<config['model']['test_date']]
dataframes['test_set']=dataframes['ratings'][dataframes['ratings']['date']>=config['model']['test_date']]

In [None]:
# mlflow tracking
mlflow.set_tracking_uri(uri=config['tracking']['uri'])
mlflow.set_experiment(experiment_name=config['tracking']['experiment_name'])
mlflow.autolog(disable=True)

with mlflow.start_run(run_name=config['tracking']['run_name']):
    with mlflow.start_run(run_name=config['model']['algo_class'], nested=True):
        # log multiple inputs
        mlflow.set_tag(key='algorithm', value=config['model']['algo_class'])
        mlflow.log_params(params=config['model']['params'])
        mlflow.log_dict(dictionary=config, artifact_file="config_evaluate.yml")
        mlflow.log_input(dataset=mlflow.data.from_pandas(df=dataframes['train_set'].head(), name="Ratings_Trainset"), context="Train")
        mlflow.log_input(dataset=mlflow.data.from_pandas(df=dataframes['test_set'].head(), name="Ratings_Testset"), context="Test")

        model__evaluation=model_evaluation(
            algo_class=config['model']['algo_class']
            , params=config['model']['params']
            , metrics=config['model']['metrics']
            )
        model__evaluation.fit(train_set=dataframes['train_set'])
        model__evaluation.evaluate(test_set=dataframes['test_set'], train_set=dataframes['train_set'])
        
        # log test metrics and model
        mlflow.log_metrics(metrics=model__evaluation.metrics)
        mlflow.sklearn.log_model(sk_model=model__evaluation.algo_class, artifact_path=config['model']['algo_class']+'_model')