In [None]:
import os
import mlflow
from mlflow import MlflowClient
import torch
import torchmetrics
import random
import numpy as np
from tqdm import tqdm
import sys

PROJECT_PATH = '...'
SHARED_PROJECT_PATH = '...'

sys.path.append(PROJECT_PATH)

from src.utils.data_utils import read_mlflow_dataset, read_mlflow_dataset_config
from src.trainers.AETrainer import AEFCNTrainer

RANDOM_SEED = 42
AE_RUN_ID = '...'

mlflow.set_tracking_uri('...')

In [None]:
experiment = mlflow.set_experiment('02-02_fully_connected_network')

In [None]:
with mlflow.start_run(log_system_metrics=True) as run:
    # Seed random generators to ensure deterministic experiments
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed(RANDOM_SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # Define PyTorch device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Read and log train, validation, test datasets and dataset config file
    data_date = '03-08-2024'
    X_train, y_train, _ = read_mlflow_dataset(SHARED_PROJECT_PATH, data_date, 'train', '-resmpl', targets='acc_no_23-24', device=device)
    X_test, y_test, non_accident_dim = read_mlflow_dataset(SHARED_PROJECT_PATH, data_date, 'test', '-resmpl', targets='acc_no_23-24', device=device)
    df_conf = read_mlflow_dataset_config(SHARED_PROJECT_PATH, data_date)
    
    # Load trained autoencoder from MLflow
    autoencoder = mlflow.pytorch.load_model(f'runs:/{AE_RUN_ID}/{MlflowClient().list_artifacts(AE_RUN_ID, "models")[0].path}')
    
    # Specify and log training parameters
    params = {
        'autoencoder': autoencoder,
        'inp_dim': X_train.shape[1] - non_accident_dim + autoencoder.enc_dim,
        'learning_rate': 1e-2
    }
    params_log = params.copy()
    params_log['data_date'] = data_date
    mlflow.log_params(params_log)

    # Define, train and evaluate model
    trainer = AEFCNTrainer(**params)
    trainer.train(X_train, y_train, X_test, y_test, epochs=200, min_max_norms=(df_conf['acc_no_norm_min'][0], df_conf['acc_no_norm_max'][0]))
    trainer.evaluate(X_test, y_test, 'test', min_max_norms=(df_conf['acc_no_norm_min'][0], df_conf['acc_no_norm_max'][0]))
    trainer.evaluate(X_train, y_train, 'train', torch.nn.L1Loss(), 'mae', min_max_norms=(df_conf['acc_no_norm_min'][0], df_conf['acc_no_norm_max'][0]))
    trainer.evaluate(X_test, y_test, 'test', torch.nn.L1Loss(), 'mae', min_max_norms=(df_conf['acc_no_norm_min'][0], df_conf['acc_no_norm_max'][0]))
    trainer.evaluate(X_train, y_train, 'train', torchmetrics.regression.MeanAbsolutePercentageError(), 'mape',
                     min_max_norms=(df_conf['acc_no_norm_min'][0], df_conf['acc_no_norm_max'][0]), proc_func=lambda x: x+1)
    trainer.evaluate(X_test, y_test, 'test', torchmetrics.regression.MeanAbsolutePercentageError(), 'mape',
                     min_max_norms=(df_conf['acc_no_norm_min'][0], df_conf['acc_no_norm_max'][0]), proc_func=lambda x: x+1)