In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from main.tooling.Logger import logging_setup
from copy import deepcopy

import os
print(os.getcwd())  # Prints the current working directory
os.chdir('..') 
print(os.getcwd())

logger = logging_setup("setupEvaluation")

In [None]:
from main.structure.Factories.ConfigurationFactory import ConfigurationFactory

configurationFactory = ConfigurationFactory()
# TrainingAllDatasets / TrainingOnlyP2GoldenDataset / TrainingOnlyKomootDataset / TrainingKomootTestP2GoldenDatasets / TrainingP2GoldenTestKomootDatasets
conf = configurationFactory.__create__("TrainingAllDatasets")
logger.info("Configuration %s", conf)

## Setup Experiment

In [None]:
import mlflow
conf.project.experiment_name = 'RC_TrainingAllDatasets'

if mlflow.get_experiment_by_name(conf.project.experiment_name) is None:
        mlflow.create_experiment(conf.project.experiment_name)

# Hyperparameter Analysis

## First Run

In [None]:
from main.behavior.RelevanceClassifier import RelevanceClassifier
from main.tooling.MLflowHandler import config_mlflow

defaultConf = deepcopy(conf)
defaultConf.project.run_name = 'default'

with config_mlflow(defaultConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(defaultConf)

## Increasing Batchsize

In [None]:
increasedBatchsizeConf = deepcopy(conf)
increasedBatchsizeConf.project.run_name = 'batchsize_32'
increasedBatchsizeConf.modelArgs.per_device_eval_batch_size = 32
increasedBatchsizeConf.modelArgs.per_device_train_batch_size = 32


with config_mlflow(increasedBatchsizeConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(increasedBatchsizeConf)

## Increasing Epoch Number

In [None]:
increasedEpochNumberConf1 = deepcopy(conf)
increasedEpochNumberConf1.project.run_name = 'epochs_20'
increasedEpochNumberConf1.modelArgs.num_train_epochs = 20


with config_mlflow(increasedEpochNumberConf1) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(increasedEpochNumberConf1)

In [None]:
increasedEpochNumberConf2 = deepcopy(conf)
increasedEpochNumberConf2.project.run_name = 'epochs_25'
increasedEpochNumberConf2.modelArgs.num_train_epochs = 25


with config_mlflow(increasedEpochNumberConf2) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(increasedEpochNumberConf2)

## Decreasing Epoch Number

In [None]:
decreasedEpochNumberConf1 = deepcopy(conf)
decreasedEpochNumberConf1.project.run_name = 'epochs_10'
decreasedEpochNumberConf1.modelArgs.num_train_epochs = 10


with config_mlflow(decreasedEpochNumberConf1) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(decreasedEpochNumberConf1)

In [None]:
decreasedEpochNumberConf2 = deepcopy(conf)
decreasedEpochNumberConf2.project.run_name = 'epochs_5'
decreasedEpochNumberConf2.modelArgs.num_train_epochs = 5


with config_mlflow(decreasedEpochNumberConf2) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(decreasedEpochNumberConf2)

In [None]:
decreasedEpochNumberConf3 = deepcopy(conf)
decreasedEpochNumberConf3.project.run_name = 'epochs_3'
decreasedEpochNumberConf3.modelArgs.num_train_epochs = 3


with config_mlflow(decreasedEpochNumberConf3) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(decreasedEpochNumberConf3)

## Increasing Fold Number

In [None]:
from main.behavior.RelevanceClassifier import RelevanceClassifier
from main.tooling.MLflowHandler import config_mlflow

increasedFoldNumberConf = deepcopy(conf)
increasedFoldNumberConf.project.run_name = 'Fold_10'
increasedFoldNumberConf.fold_number = 10


with config_mlflow(increasedFoldNumberConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(increasedFoldNumberConf)

## Decreasing Learning Rate

In [None]:
decreasedLRConf = deepcopy(conf)
decreasedLRConf.project.run_name = 'LR_2e-4'
decreasedLRConf.modelArgs.learning_rate = 2e-4


with config_mlflow(decreasedLRConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(decreasedLRConf)

## Increasing Weight Decay

In [None]:
increaseWeightDecayConf = deepcopy(conf)
increaseWeightDecayConf.project.run_name = 'WeightDecay_0.999'
increaseWeightDecayConf.modelArgs.weight_decay = 0.999


with config_mlflow(increaseWeightDecayConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(increaseWeightDecayConf)

# Combined Hyperparameter Analysis

## (Depending which standard hyperparameter change achieved better results than the default configuration -> these are combined)