In [None]:
import mlflow
from sklearn.model_selection import ParameterGrid
from mlflow import MlflowClient
from pathlib import Path
import pandas as pd
import tempfile
from glob import glob

In [None]:
PROJECT_ROOT = Path.cwd().parents[0]
NOTEBOOKS_ROOT = PROJECT_ROOT / "notebooks"
PAPERMILL_PATH = PROJECT_ROOT/ ".venv" / "bin" / "papermill"

In [None]:
mlflow.create_experiment('des-base')

In [None]:
all_processed_data_files = glob(f"{DATASETS_DIR}/*")
all_file_names = [
    file.split('/')[-1].split('train')[0].rstrip('-') for file in all_processed_data_files if 'train' in file
]
print(all_file_names[:5])

In [None]:
train_and_test_paths = [
    {
        'train': path,
        'test': path.replace('train', 'test')
    } for path in all_processed_data_files if 'train' in path
]
print(train_and_test_paths[:2])

In [None]:
base_params = ParameterGrid({
    "train_path": [path for path in all_processed_data_files if 'train' in path],
    "bagging_size": [50, 100, 200, 500],
})

In [None]:
params = ParameterGrid({
    "ensemble_size": [5, 10, 20],
    "train_path": [path for path in all_processed_data_files if 'train' in path],
    "bagging_size": [50, 100, 200, 500],
    "pop_size": [100],
    "n_gen": [100],
    "scoring_method": ['normal', 'diversity']
})

In [None]:
client = MlflowClient()

In [None]:
for param_set in base_params:
    run = client.create_run('2')
    for param, value in param_set.items():
        client.log_param(run_id=run.info.run_id, key=param, value=value)