In [None]:
import os
import mlflow
import torch
import random
import numpy as np
from tqdm import tqdm
import sys

PROJECT_PATH = '...'
SHARED_PROJECT_PATH = '...'

sys.path.append(PROJECT_PATH)

from src.utils.data_utils import read_mlflow_dataset
from src.trainers.AETrainer import AETrainer

RANDOM_SEED = 42

mlflow.set_tracking_uri('...')

In [None]:
experiment = mlflow.set_experiment('02-01_denoising_autoencoder')

In [None]:
with mlflow.start_run(log_system_metrics=True) as run:
    # Seed random generators to ensure deterministic experiments
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed(RANDOM_SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # Define PyTorch device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Read and log train, validation and test datasets
    data_date = '03-08-2024'
    X_train, y_train, _ = read_mlflow_dataset(SHARED_PROJECT_PATH, data_date, 'train', '-resmpl', targets='acc_no_23-24', device=device)
    X_test, y_test, non_accident_dim = read_mlflow_dataset(SHARED_PROJECT_PATH, data_date, 'test', '-resmpl', targets='acc_no_23-24', device=device)
    
    # Specify and log training parameters
    params = {
        'inp_dim': non_accident_dim,
        'noise_factor': 0.5,
        'enc_dim': 4,
        'learning_rate': 1e-2,
        'weight_decay': 1e-8
    }
    params_log = params.copy()
    params_log['data_date'] = data_date
    mlflow.log_params(params_log)

    # Define, train and evaluate model
    trainer = AETrainer(**params)
    trainer.train(X_train, y_train, X_test, y_test, 1500)
    trainer.evaluate(X_test, y_test, 'test')