In [1]:
import mlflow
from sklearn.model_selection import ParameterGrid
from mlflow import MlflowClient
from pathlib import Path
import pandas as pd
import tempfile
from glob import glob
import subprocess

In [2]:
# mlflow.create_experiment("diversity-functions")

In [2]:
PROJECT_ROOT = Path.cwd().parents[0]
NOTEBOOKS_ROOT = PROJECT_ROOT / "notebooks"
PAPERMILL_PATH = PROJECT_ROOT/ ".venv" / "bin" / "papermill"

In [3]:
EXPERIMENT_ID = '1'
DATASETS_DIR = '../../datasets/processed/'


In [126]:
experiments_for_analysis_df\
 .assign(is_method_better=lambda row: row['metrics.selected_ensemble_accuracy'] > row['metrics.accuracy_selection_accuracy'])\
    .assign(is_method_equal=lambda row: row['metrics.selected_ensemble_accuracy'] == row['metrics.accuracy_selection_accuracy'])\
    .assign(difference=lambda row: row['metrics.selected_ensemble_accuracy'] - row['metrics.accuracy_selection_accuracy'])\
    ['is_method_better'].value_counts()

False    8763
True     2017
Name: is_method_better, dtype: int64

In [127]:
experiments_for_analysis_df['metrics.accuracy_ensemble_selected'].value_counts()

0.0    6706
1.0    4074
Name: metrics.accuracy_ensemble_selected, dtype: int64

## Running

In [35]:
experiments_tmp_dir = tempfile.mkdtemp(prefix="bogul-exp-ijcnn")

In [36]:
TMP_DIR = Path(experiments_tmp_dir)

In [37]:
TMP_DIR

PosixPath('/home/bogul/tmp/bogul-exp-ijcnnsz_5lsl_')

In [39]:
def run_experiments_in_slurm(run_ids, notebook_path, output_dir_path):
    futures = []
    
    for run_id in run_ids:
        # print(f"Running file {file_name}")
        papermill_command = f"{str(PAPERMILL_PATH)} {str(notebook_path)} {str(output_dir_path)}/{run_id}.ipynb -p EXPERIMENT_INSTANCE_ID {run_id}"
        std_out_path = output_dir_path / f"{run_id}.out"
        std_err_path = output_dir_path / f"{run_id}.err"
        
        slurm_command = f"sbatch -o {str(std_out_path)} -e {str(std_err_path)} slurm-script.sh \"{papermill_command}\"" 

        # print(slurm_command)
        subprocess.run(slurm_command, shell=True)

        
    return futures

In [21]:
how_many_jobs = lambda: int(subprocess.run("squeue | grep bogul | wc -l", stdout=subprocess.PIPE, shell=True).stdout.decode('utf-8').strip())

In [22]:
import time

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
        
        
def run_in_batches(run_ids, notebook_path, output_dir_path, batch_size=250, sleep_interval=25):
    chunked_run_ids = chunks(run_ids, batch_size)
    
    while(True):
        jobs_currently = how_many_jobs()
        if jobs_currently <= batch_size:
            print(f"There are {jobs_currently} runnig jobs, scheduling next batch of {batch_size}")
            try:
                next_batch = next(chunked_run_ids)
                run_experiments_in_slurm(next_batch, notebook_path, output_dir_path)
            except StopIteration:
                print("End of batches!")
                break
        else:
            print(f"There are {jobs_currently}, cant schedule yet!")
        print(f"Waiting {sleep_interval}") 
        time.sleep(sleep_interval)
    
    

In [40]:
run_experiments_in_slurm(base_experiments_run_ids[:5], Path(BASE_EXPERIMENT_NOTEBOOK_PATH), TMP_DIR)

Submitted batch job 899142
Submitted batch job 899143
Submitted batch job 899144
Submitted batch job 899145
Submitted batch job 899146


[]

In [49]:
NOTEBOOK_PATH = NOTEBOOKS_ROOT / '3.0-single-experiment.ipynb'

## Base

In [16]:
BASE_EXPERIMENT_NOTEBOOK_PATH = NOTEBOOKS_ROOT / "base_results.ipynb"

In [17]:
base_experiments_df = mlflow.search_runs('2', output_format='pandas')

In [38]:
base_experiments_run_ids = base_experiments_df.run_id

In [None]:
run_in_batches(base_experiments_run_ids, Path(BASE_EXPERIMENT_NOTEBOOK_PATH), TMP_DIR, sleep_interval=100)

There are 0 runnig jobs, scheduling next batch of 250
Submitted batch job 899150
Submitted batch job 899151
Submitted batch job 899152
Submitted batch job 899153
Submitted batch job 899154
Submitted batch job 899155
Submitted batch job 899156
Submitted batch job 899157
Submitted batch job 899158
Submitted batch job 899159
Submitted batch job 899160
Submitted batch job 899161
Submitted batch job 899162
Submitted batch job 899163
Submitted batch job 899164
Submitted batch job 899165
Submitted batch job 899166
Submitted batch job 899167
Submitted batch job 899168
Submitted batch job 899169
Submitted batch job 899170
Submitted batch job 899171
Submitted batch job 899172
Submitted batch job 899173
Submitted batch job 899174
Submitted batch job 899175
Submitted batch job 899176
Submitted batch job 899177
Submitted batch job 899178
Submitted batch job 899179
Submitted batch job 899180
Submitted batch job 899181
Submitted batch job 899182
Submitted batch job 899183
Submitted batch job 899184
S

## Method

In [42]:
experiments = mlflow.search_runs('1', output_format='pandas')

In [45]:
viable_experiments = experiments[(experiments['metrics.accuracy_selection_accuracy'].isna() == True)]

In [47]:
viable_experiments

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.method_selection_accuracy,metrics.accuracy_ensemble_selected,metrics.selected_ensemble_accuracy,metrics.accuracy_selection_accuracy,params.bagging_size,params.ensemble_size,params.dataset,params.n_gen,params.train_path,params.scoring_method,params.pop_size,params.train_and_test_paths,tags.mlflow.runName
0,4f1f1228893d4ea982d4a9c13fd7de45,1,RUNNING,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 22:34:39.930000+00:00,NaT,,,,,500,20,,,,,,,lyrical-bird-283
5,fa1926761b4f47dbab4152a405a6df91,1,FINISHED,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 22:34:38.497000+00:00,2022-12-03 21:12:19.549000+00:00,,,,,500,20,nursery-3-s1.csv,100,../../datasets/processed/nursery-train-3-s1.csv,normal,100,,spiffy-owl-732
26,7ff0138d299c45d4a5357672f7032472,1,FAILED,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 22:34:29.683000+00:00,2022-12-03 21:12:13.633000+00:00,,,,,500,20,ecoli-0-s1.csv,100,../../datasets/processed/ecoli-train-0-s1.csv,normal,100,,resilient-sloth-77
33,5d2ffab4007544d0b567ad37bc4d05e1,1,FINISHED,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 22:34:26.684000+00:00,2022-12-03 21:12:16.151000+00:00,,,,,500,20,ecoli-1-s2.csv,100,../../datasets/processed/ecoli-train-1-s2.csv,normal,100,,judicious-asp-522
39,06350954c6cc4af883cf4871d82d40df,1,FINISHED,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 22:34:24.475000+00:00,2022-12-03 21:12:16.313000+00:00,,,,,500,20,ecoli-0-s2.csv,100,../../datasets/processed/ecoli-train-0-s2.csv,normal,100,,kindly-fox-514
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14693,263ca9d9fc0541a0a8d587dd4d464302,1,FINISHED,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 21:44:47.278000+00:00,2022-12-03 22:29:53.643000+00:00,,,,,50,5,abalone-2-s2.csv,100,../../datasets/processed/abalone-train-2-s2.csv,normal,100,,judicious-worm-877
14707,b0535df47a894d17bdd2725dd665b241,1,RUNNING,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 20:55:25.705000+00:00,NaT,,,,,50,5,,100,,normal,100,{'train': '../../datasets/processed/penbased-t...,caring-sponge-567
14708,3493811201af4737b7f559258a685a02,1,RUNNING,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 20:55:25.466000+00:00,NaT,,,,,50,5,,100,,normal,100,{'train': '../../datasets/processed/tae-train-...,likeable-pig-678
14709,5e348e53a3274a2ea6cfc87089870064,1,RUNNING,file:///home/bogul/scalarizing/notebooks/mlrun...,2022-11-23 20:54:49.275000+00:00,NaT,,,,,,,,,,,,,trusting-trout-782


In [None]:
run_in_batches(viable_experiments.run_id, Path(NOTEBOOK_PATH), TMP_DIR, sleep_interval=300)

There are 0 runnig jobs, scheduling next batch of 250
Submitted batch job 902059
Submitted batch job 902060
Submitted batch job 902061
Submitted batch job 902062
Submitted batch job 902063
Submitted batch job 902064
Submitted batch job 902065
Submitted batch job 902066
Submitted batch job 902067
Submitted batch job 902068
Submitted batch job 902069
Submitted batch job 902070
Submitted batch job 902071
Submitted batch job 902072
Submitted batch job 902073
Submitted batch job 902074
Submitted batch job 902075
Submitted batch job 902076
Submitted batch job 902077
Submitted batch job 902078
Submitted batch job 902079
Submitted batch job 902080
Submitted batch job 902081
Submitted batch job 902082
Submitted batch job 902083
Submitted batch job 902084
Submitted batch job 902085
Submitted batch job 902086
Submitted batch job 902087
Submitted batch job 902088
Submitted batch job 902089
Submitted batch job 902090
Submitted batch job 902091
Submitted batch job 902092
Submitted batch job 902093
S