In [None]:
import sys
sys.path.append('../src')

import yaml
from data.data_loader import load_data
from src.data.prepare_data import prepare_data
import pandas as pd
from src.models.model import grid_search
import mlflow

# start mlflow ui
#import subprocess, time
#ui_process = subprocess.Popen(["python", "-m", "mlflow", "ui"])
#time.sleep(5)
#ui_process.terminate()

ModuleNotFoundError: No module named 'src'

In [None]:
# read config
with open('main/config_train.yml', 'r') as file:
    config=yaml.load(file, Loader=yaml.SafeLoader)

In [None]:
# load and prepare data
dataframes=load_data(config=config['data_loader'])
dataframes=prepare_data(dataframes=dataframes)

# filter out test set based on test date
dataframes['train_set']=dataframes['ratings'][dataframes['ratings']['date']<config['model']['test_date']]

In [None]:
# mlflow tracking
mlflow.set_tracking_uri(uri=config['tracking']['uri'])
mlflow.set_experiment(experiment_name=config['tracking']['experiment_name'])
mlflow.autolog(disable=True)

with mlflow.start_run(run_name=config['tracking']['run_name']):
    mlflow.log_dict(dictionary=config, artifact_file="config_evaluate.yml")
    for algo in config['model']['algo_class']:
        with mlflow.start_run(run_name=algo, nested=True):

            # perform grid search
            gsearch = grid_search(
            algo_class=algo
            , measures=config['cross_validation']['metrics']
            , cv=config['cross_validation']['iterator']
            , return_train_measures=True
            )
            cv_results=gsearch.fit(train_set=dataframes['train_set'])

            # log tested algorithm, best metric, params and model
            mlflow.set_tag(key='algorithm', value=algo)
            mlflow.log_metric(key=gsearch.measures[0], value=gsearch.best_score)
            mlflow.log_params(params=gsearch.best_params)
            mlflow.sklearn.log_model(sk_model=gsearch.best_estimator, artifact_path=algo+'_model')
            mlflow.log_input(mlflow.data.from_pandas(cv_results, name="GS_"+algo), context="Grid Search on "+algo)