In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from main.tooling.Logger import logging_setup
from copy import deepcopy

logger = logging_setup("setupEvaluation")

In [None]:
from main.structure.Factories.ConfigurationFactory import ConfigurationFactory

configurationFactory = ConfigurationFactory()
# TrainingAllDatasets / TrainingOnlyP2GoldenDataset / TrainingOnlyKomootDataset / TrainingKomootTestP2GoldenDatasets / TrainingP2GoldenTestKomootDatasets
trainingAllConf = configurationFactory.__create__("TrainingAllDatasets")
trainingOnlyP2GoldenConf = configurationFactory.__create__("TrainingOnlyP2GoldenDataset")
trainingOnlyKomootConf = configurationFactory.__create__("TrainingOnlyKomootDataset")
trainingKomootTestP2GoldenConf = configurationFactory.__create__("TrainingKomootTestP2GoldenDatasets")
trainingP2GoldenTestKomootConf = configurationFactory.__create__("TrainingP2GoldenTestKomootDatasets")
logger.info("Configuration %s", trainingAllConf)

## Setup MLflow Experiment if not existing

In [None]:
import mlflow

if mlflow.get_experiment_by_name(trainingAllConf.project.experiment_name) is None:
        mlflow.create_experiment(trainingAllConf.project.experiment_name)

## Training with all datasets (P2-Golden + Komoot)

In [None]:
from main.behavior.RelevanceClassifier import RelevanceClassifier
from main.tooling.MLflowHandler import config_mlflow

allConf = deepcopy(trainingAllConf)
allConf.project.run_name = 'AllDatasets'
allConf.project.model_name = 'model'
allConf.modelArgs.num_train_epochs = 7

with config_mlflow(allConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(allConf)

## Training only P2-Golden dataset

In [None]:
p2GoldenConf = deepcopy(trainingOnlyP2GoldenConf)
p2GoldenConf.project.run_name = 'OnlyP2Golden'
p2GoldenConf.project.model_name = 'model'
p2GoldenConf.modelArgs.num_train_epochs = 7

with config_mlflow(p2GoldenConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(p2GoldenConf)

## Training only Komoot dataset

In [None]:
komootConf = deepcopy(trainingOnlyKomootConf)
komootConf.project.run_name = 'OnlyKomoot'
komootConf.project.model_name = 'model'
komootConf.modelArgs.num_train_epochs = 7

with config_mlflow(komootConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(komootConf)

## Training with Komoot dataset and Testing with P2-Golden dataset

In [None]:
komootP2GoldenConf = deepcopy(trainingKomootTestP2GoldenConf)
komootP2GoldenConf.project.run_name = 'TrainingKomootTestP2Golden'
komootP2GoldenConf.project.model_name = 'model'
komootP2GoldenConf.modelArgs.num_train_epochs = 7

with config_mlflow(komootP2GoldenConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(komootP2GoldenConf)

## Training with P2-Golden dataset and Testing with Komoot dataset

In [None]:
p2GoldenKomootConf = deepcopy(trainingP2GoldenTestKomootConf)
p2GoldenKomootConf.project.run_name = 'TrainingP2GoldenTestKomoot'
p2GoldenKomootConf.project.model_name = 'model'
p2GoldenKomootConf.modelArgs.num_train_epochs = 7

with config_mlflow(p2GoldenKomootConf) as current_run:
    logger.info("Starting TrainingPipeline")
    relevanceClassifier = RelevanceClassifier()
    relevanceClassifier.startTrainingPipeline(p2GoldenKomootConf)