In [5]:
import os
import yaml
import tqdm
import time
import torch
import random
import joblib
import logging
import subprocess
import pandas as pd
import multiprocessing as mp
from train_SL import trainer, mlp_trainer
from functools import partial

In [6]:
root = logging.getLogger()
root.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')

# Stream output to stdout
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
root.addHandler(ch)

In [7]:
is_cuda = torch.cuda.is_available()
device = torch.device(torch.cuda.current_device()) if is_cuda else torch.device("cpu")

In [19]:
config = "config/SL.yml"

In [20]:
with open(config) as cf:
    conf = yaml.load(cf, Loader=yaml.FullLoader)

In [21]:
save_loc = conf["save_loc"]

In [23]:
data_seeds = list(range(100)) # refers to which split to take from GroupShuffleSplit(n_ensembles = 100)

In [24]:
model_seeds = random.sample(range(1, 5000), 100)

In [26]:
if not os.path.isfile("SL/ttt/seeds.pkl"):
    with open(f"{save_loc}/seeds.pkl", "wb") as fid:
        joblib.dump([data_seeds, model_seeds], fid)

In [27]:
with open(f"{save_loc}/seeds.pkl", "rb") as fid:
    data_seeds, model_seeds = joblib.load(fid)

In [28]:
def launch(config, model_seed, data_seed, model_type, save_path_extend):
    with open(config) as cf:
        conf = yaml.load(cf, Loader=yaml.FullLoader)
    save_loc = os.path.join(conf["save_loc"], f"{save_path_extend}/{data_seed}_{model_seed}")
    conf["data_seed"] = data_seed
    conf["model_seed"] = model_seed
    conf["save_loc"] = save_loc
    os.makedirs(save_loc, exist_ok = True)

    with open(os.path.join(save_loc, "model.yml"), "w") as fid:
        yaml.dump(conf, fid)
    if model_type == "mlp":
        result = mlp_trainer(conf, evaluate = True, verbose = False, device = device)
    else:
        result = trainer(conf, evaluate = True, verbose = False, device = device)
    result["data_seed"] = data_seed 
    result["model_seed"] = model_seed
    return result

def model_launch(model_seed, config, data_seed, model_type):
    seed_id = model_seeds.index(model_seed)
    worker_id = model_seeds.index(model_seed) % 8
    logging.info(f"Starting {seed_id} / {len(model_seeds)}, sleeping for {0.1*worker_id:.1f} s")
    #time.sleep(0.1 * worker_id)
    t0 = time.time()
    result = launch(config, model_seed, data_seed, model_type, "model")
    logging.info(f"Finished {seed_id} in {time.time()-t0}")
    return 

def data_launch(data_seed, config, model_seed, model_type):
    seed_id = data_seeds.index(data_seed)
    worker_id = data_seeds.index(data_seed) % 8
    logging.info(f"Starting {seed_id} / {len(data_seeds)}, sleeping for {0.1*worker_id:.1f} s")
    #time.sleep(0.1 * worker_id)
    t0 = time.time()
    result = launch(config, model_seed, data_seed, model_type, "data")
    logging.info(f"Finished {seed_id} in {time.time()-t0}")
    return 

In [29]:
data_seed = data_seeds[0]
model_seed = model_seeds[0]

### Create ensemble of model splits, with fixed train/valid/test data split

In [30]:
model_worker = partial(model_launch, 
                       config="config/SL-mlp.yml", 
                       model_type = "mlp", 
                       data_seed=data_seed)

In [31]:
#%%time
model_results = []
with mp.Pool(8) as p:
    for result in tqdm.tqdm(p.imap(model_worker, model_seeds), total = len(model_seeds)):
        model_results.append(result)

INFO:root:Starting 1 / 100, sleeping for 0.1 s
INFO:root:Starting 0 / 100, sleeping for 0.0 s
INFO:root:Starting 2 / 100, sleeping for 0.2 s
INFO:root:Starting 3 / 100, sleeping for 0.3 s
INFO:root:Starting 4 / 100, sleeping for 0.4 s
INFO:root:Starting 5 / 100, sleeping for 0.5 s
  0%|          | 0/100 [00:00<?, ?it/s]INFO:root:Starting 7 / 100, sleeping for 0.7 s
INFO:root:Starting 1 / 100, sleeping for 0.1 s
INFO:root:Starting 6 / 100, sleeping for 0.6 s
INFO:root:Starting 5 / 100, sleeping for 0.5 s
INFO:root:Starting 2 / 100, sleeping for 0.2 s
INFO:root:Starting 3 / 100, sleeping for 0.3 s
INFO:root:Starting 7 / 100, sleeping for 0.7 s
INFO:root:Starting 0 / 100, sleeping for 0.0 s
INFO:root:Starting 4 / 100, sleeping for 0.4 s
INFO:root:Starting 6 / 100, sleeping for 0.6 s
  0%|          | 0/100 [00:01<?, ?it/s]INFO:root:Starting 8 / 100, sleeping for 0.0 s
INFO:root:Starting 9 / 100, sleeping for 0.1 s
INFO:root:Starting 10 / 100, sleeping for 0.2 s
INFO:root:Starting 13 / 100,

RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method

In [15]:
df_model = pd.DataFrame.from_dict(model_results).reset_index()
df_model.to_csv(f"{save_loc}/mlp_model_ensemble_results.csv", index = False)

### Create ensemble of data splits, with fixed model seed

In [16]:
data_worker = partial(data_launch,
                      config="config/SL-mlp.yml", 
                      model_type="mlp",
                      model_seed=model_seed)

In [17]:
data_results = []
with mp.Pool(8) as p:
    for result in tqdm.tqdm(p.imap(data_worker, data_seeds), total = len(data_seeds)):
        data_results.append(result)

INFO:root:Starting 1 / 100, sleeping for 0.1 s
INFO:root:Starting 3 / 100, sleeping for 0.3 s
  0%|          | 0/100 [00:00<?, ?it/s]INFO:root:Starting 0 / 100, sleeping for 0.0 s
INFO:root:Starting 5 / 100, sleeping for 0.5 s
INFO:root:Starting 2 / 100, sleeping for 0.2 s
INFO:root:Starting 4 / 100, sleeping for 0.4 s
INFO:root:Starting 6 / 100, sleeping for 0.6 s
INFO:root:Starting 7 / 100, sleeping for 0.7 s
INFO:root:Finished 1 in 48.39467453956604
INFO:root:Starting 8 / 100, sleeping for 0.0 s
INFO:root:Finished 6 in 55.00976276397705
INFO:root:Starting 9 / 100, sleeping for 0.1 s
INFO:root:Finished 7 in 56.426615476608276
INFO:root:Starting 10 / 100, sleeping for 0.2 s
INFO:root:Finished 4 in 64.45585227012634
INFO:root:Starting 11 / 100, sleeping for 0.3 s
INFO:root:Finished 5 in 64.3886992931366
INFO:root:Starting 12 / 100, sleeping for 0.4 s
INFO:root:Finished 2 in 68.2267472743988
INFO:root:Starting 13 / 100, sleeping for 0.5 s
INFO:root:Finished 0 in 71.57834911346436
INFO:r

In [18]:
df_data = pd.DataFrame.from_dict(data_results).reset_index()
df_data.to_csv(f"{save_loc}/mlp_data_ensemble_results.csv", index = False)

### Now with the evidential model

In [19]:
model_worker = partial(model_launch, 
                       config="config/SL.yml", 
                       model_type = "evidential", 
                       data_seed=data_seed)

In [20]:
#%%time
model_results = []
with mp.Pool(8) as p:
    for result in tqdm.tqdm(p.imap(model_worker, model_seeds), total = len(model_seeds)):
        model_results.append(result)

INFO:root:Starting 0 / 100, sleeping for 0.0 s
INFO:root:Starting 2 / 100, sleeping for 0.2 s
INFO:root:Starting 1 / 100, sleeping for 0.1 s
  0%|          | 0/100 [00:00<?, ?it/s]INFO:root:Starting 4 / 100, sleeping for 0.4 s
INFO:root:Starting 3 / 100, sleeping for 0.3 s
INFO:root:Starting 6 / 100, sleeping for 0.6 s
INFO:root:Starting 5 / 100, sleeping for 0.5 s
INFO:root:Starting 7 / 100, sleeping for 0.7 s
INFO:numexpr.utils:Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
INFO:numexpr.utils:Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:root:Finished 5 in 118.57985734939575
INFO:root:Starting 8 / 100, sleeping for 0.0 s
INFO:numexpr.utils:Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
INFO:numexpr.utils:Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit

In [21]:
df_model = pd.DataFrame.from_dict(model_results).reset_index()
df_model.to_csv(f"{save_loc}/evidential_model_ensemble_results.csv", index = False)

In [22]:
data_worker = partial(data_launch, 
                      config="config/SL.yml", 
                      model_type = "evidential",
                      model_seed=model_seed)

In [23]:
data_results = []
with mp.Pool(8) as p:
    for result in tqdm.tqdm(p.imap(data_worker, data_seeds), total = len(data_seeds)):
        data_results.append(result)

INFO:root:Starting 0 / 100, sleeping for 0.0 s
INFO:root:Starting 3 / 100, sleeping for 0.3 s
INFO:root:Starting 1 / 100, sleeping for 0.1 s
  0%|          | 0/100 [00:00<?, ?it/s]INFO:root:Starting 6 / 100, sleeping for 0.6 s
INFO:root:Starting 4 / 100, sleeping for 0.4 s
INFO:root:Starting 5 / 100, sleeping for 0.5 s
INFO:root:Starting 7 / 100, sleeping for 0.7 s
INFO:root:Starting 2 / 100, sleeping for 0.2 s
INFO:numexpr.utils:Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
INFO:numexpr.utils:Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:root:Finished 4 in 92.56176471710205
INFO:root:Starting 8 / 100, sleeping for 0.0 s
INFO:numexpr.utils:Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
INFO:numexpr.utils:Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit 

In [24]:
df_data = pd.DataFrame.from_dict(data_results).reset_index()
df_data.to_csv(f"{save_loc}/evidential_data_ensemble_results.csv", index = False)