In [1]:
import subprocess
import time

In [2]:
dataset = "sepsis_cases"
bucket_method = "single"
cls_encoding = "agg"

In [23]:
bucket_encoding = "agg"
cls_method = "xgboost"
n_iter = 16

if cls_method == "rf":
    experiments_filename = "experiments_param_optim_rf.py"
    results_dir = "results/validation_rf"
else:
    experiments_filename = "experiments_param_optim_xgboost.py"
    results_dir = "results/validation"
    
experiments_filename = "experiments_param_optim_xgboost_cv.py"
results_dir = "results/validation_xgboost_cv"

In [24]:
if dataset == "minit" or "sample30000" in dataset:
    memory = 20000
elif "bpic2017" in dataset or "hospital_billing" in dataset or "traffic_fines":
    memory = 30000
else:
    memory = 10000

In [25]:
with open("run.sh", "w") as fout:
    fout.write("#!/bin/bash\n")
    fout.write("#SBATCH --output=results/output_files/cv_%s_%s_%s_%s.csv\n" % (dataset, bucket_method, cls_encoding, cls_method))
    fout.write("#SBATCH --mem=%s\n" % memory)
    fout.write("#SBATCH --time=3-00\n")

    fout.write("python %s %s %s %s %s %s %s %s" % (experiments_filename, dataset,
                                                                                   bucket_encoding,
                                                                                   bucket_method,
                                                                                   cls_encoding,
                                                                                   cls_method,
                                                                                   results_dir, n_iter))

time.sleep(5)
subprocess.Popen("sbatch run.sh".split())

<subprocess.Popen at 0x7f6b34502128>

# Final results

In [19]:
dataset = "sepsis_cases"
bucket_method = "single"
cls_encoding = "agg"

In [20]:
bucket_encoding = "agg"
cls_method = "xgboost_calibrated_sigmoid"
results_dir = "results/final_xgb_cv"
optimal_params_filename = "optimal_params.pickle"
experiments_filename = "experiments_final_xgboost_cv.py"

In [21]:
if dataset == "minit" or "sample30000" in dataset:
    memory = 20000
elif "bpic2017" in dataset or "hospital_billing" in dataset or "traffic_fines":
    memory = 30000
else:
    memory = 10000

In [22]:
with open("run.sh", "w") as fout:
    fout.write("#!/bin/bash\n")
    fout.write("#SBATCH --output=results/output_files/final_cv_%s_%s_%s_%s.csv\n" % (dataset, bucket_method, cls_encoding, cls_method))
    fout.write("#SBATCH --mem=%s\n" % memory)
    fout.write("#SBATCH --time=3-00\n")

    fout.write("python %s %s %s %s %s %s %s %s" % (experiments_filename, dataset,
                                                                                   bucket_encoding,
                                                                                   bucket_method,
                                                                                   cls_encoding,
                                                                                   cls_method,
                                                   optimal_params_filename,
                                                                                   results_dir))

time.sleep(5)
subprocess.Popen("sbatch run.sh".split())

<subprocess.Popen at 0x7eff0577f0f0>

# RF validation with stability

In [5]:
import numpy as np

In [6]:
def loguniform(low=0, high=1):
    val = np.exp(np.random.uniform(0, 1, None))
    scaled_val = (((val - np.exp(0)) * (high - low)) / (np.exp(1) - np.exp(0))) + low
    return scaled_val

In [10]:
n_estimators = np.random.randint(10, 500)
max_features = loguniform(0.01, 0.9)
max_depth = np.random.randint(1, 20)

In [11]:
n_estimators, max_features, max_depth

(427, 0.63395630086199883, 7)

In [8]:
dataset = "production"
bucket_method = "single"
cls_encoding = "agg"

In [12]:
bucket_encoding = "agg"
cls_method = "rf"
n_iter = 10

experiments_filename = "experiments_param_optim_rf_stability.py"
results_dir = "results/validation_rf_stability"

In [13]:
if dataset == "minit" or "sample30000" in dataset:
    memory = 20000
elif "bpic2017" in dataset or "hospital_billing" in dataset or "traffic_fines":
    memory = 30000
else:
    memory = 10000

In [16]:
with open("run.sh", "w") as fout:
    fout.write("#!/bin/bash\n")
    fout.write("#SBATCH --output=results/output_files/stability_%s_%s_%s_%s_%s.csv\n" % (dataset,
                                                                                      bucket_method,
                                                                                      cls_encoding,
                                                                                      cls_method, "_".join([str(n_estimators), str(max_features), str(max_depth)])))
    fout.write("#SBATCH --mem=%s\n" % memory)
    fout.write("#SBATCH --time=3-00\n")

    fout.write("python %s %s %s %s %s %s %s %s %s %s %s" % (experiments_filename, dataset,
                                                                                   bucket_encoding,
                                                                                   bucket_method,
                                                                                   cls_encoding,
                                                                                   cls_method,
                                                                                   results_dir,
                                                            n_iter,
                                                            n_estimators,
                                                            max_features,
                                                            max_depth))

time.sleep(5)
subprocess.Popen("sbatch run.sh".split())

<subprocess.Popen at 0x7fd9a2ce0400>