In [1]:
%load_ext autoreload
from __future__ import print_function, division

In [2]:
%autoreload

import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score

import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
from torch.autograd import Variable
from torch.nn.parameter import Parameter

In [3]:
DATA_FILEPATH     = '../data/grouped/all_hourly_data.h5'
RAW_DATA_FILEPATH = '../data/data/all_hourly_data.h5'
GAP_TIME          = 6  # In hours
WINDOW_SIZE       = 24 # In hours
SEED              = 1
ID_COLS           = ['subject_id', 'hadm_id', 'icustay_id']
GPU               = '2'

os.environ['CUDA_VISIBLE_DEVICES'] = GPU
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7f87993a6f90>

In [4]:
class DictDist():
    def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs
    def rvs(self, n):
        a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}
        out = []
        for i in range(n): out.append({k: vs[i] for k, vs in a.items()})
        return out
    
class Choice():
    def __init__(self, options): self.options = options
    def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]

In [5]:
%%time
data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')
data_full_raw  = pd.read_hdf(RAW_DATA_FILEPATH, 'vitals_labs') 
statics        = pd.read_hdf(DATA_FILEPATH, 'patients')

CPU times: user 3min 29s, sys: 13min 40s, total: 17min 9s
Wall time: 26min 41s


In [6]:
data_full_lvl2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,LEVEL2,alanine aminotransferase,alanine aminotransferase,alanine aminotransferase,albumin,albumin,albumin,albumin ascites,albumin ascites,albumin ascites,albumin pleural,...,white blood cell count,white blood cell count urine,white blood cell count urine,white blood cell count urine,ph,ph,ph,ph urine,ph urine,ph urine
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Aggregation Function,count,mean,std,count,mean,std,count,mean,std,count,...,std,count,mean,std,count,mean,std,count,mean,std
subject_id,hadm_id,icustay_id,hours_in,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
3,145834,211552,0,2.0,25.0,0.0,2.0,1.8,0.0,0.0,,,0.0,...,4.012837,0.0,,,9.0,7.4,0.147733,1.0,5.0,
3,145834,211552,1,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,0.0,,,0.0,,
3,145834,211552,2,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,3.0,7.26,0.0,0.0,,
3,145834,211552,3,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,0.0,,,0.0,,
3,145834,211552,4,0.0,,,0.0,,,0.0,,,0.0,...,,0.0,,,0.0,,,0.0,,


In [7]:
#data_full_raw.head()

In [8]:
statics.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,ethnicity,age,insurance,admittime,diagnosis_at_admission,dischtime,discharge_location,fullcode_first,dnr_first,...,outtime,los_icu,admission_type,first_careunit,mort_icu,mort_hosp,hospital_expire_flag,hospstay_seq,readmission_30,max_hours
subject_id,hadm_id,icustay_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
3,145834,211552,M,WHITE,76.526792,Medicare,2101-10-20 19:08:00,HYPOTENSION,2101-10-31 13:58:00,SNF,1.0,0.0,...,2101-10-26 20:43:09,6.06456,EMERGENCY,MICU,0,0,0,1,0,145
4,185777,294638,F,WHITE,47.845047,Private,2191-03-16 00:28:00,"FEVER,DEHYDRATION,FAILURE TO THRIVE",2191-03-23 18:41:00,HOME WITH HOME IV PROVIDR,1.0,0.0,...,2191-03-17 16:46:31,1.678472,EMERGENCY,MICU,0,0,0,1,0,40
6,107064,228232,F,WHITE,65.942297,Medicare,2175-05-30 07:15:00,CHRONIC RENAL FAILURE/SDA,2175-06-15 16:00:00,HOME HEALTH CARE,1.0,0.0,...,2175-06-03 13:39:54,3.672917,ELECTIVE,SICU,0,0,0,1,0,88
9,150750,220597,M,UNKNOWN/NOT SPECIFIED,41.790228,Medicaid,2149-11-09 13:06:00,HEMORRHAGIC CVA,2149-11-14 10:15:00,DEAD/EXPIRED,1.0,0.0,...,2149-11-14 20:52:14,5.323056,EMERGENCY,MICU,1,1,1,1,0,127
11,194540,229441,F,WHITE,50.148295,Private,2178-04-16 06:18:00,BRAIN MASS,2178-05-11 19:00:00,HOME HEALTH CARE,1.0,0.0,...,2178-04-17 20:21:05,1.58441,EMERGENCY,SICU,0,0,0,1,0,38


In [9]:
def simple_imputer(df):
    idx = pd.IndexSlice
    df = df.copy()
    if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))
    
    df_out = df.loc[:, idx[:, ['mean', 'count']]]
    icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()
    
    df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(
        method='ffill'
    ).groupby(ID_COLS).fillna(icustay_means).fillna(0)
    
    df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)
    df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)
    
    is_absent = (1 - df_out.loc[:, idx[:, 'mask']])
    hours_of_absence = is_absent.cumsum()
    time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')
    time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)

    df_out = pd.concat((df_out, time_since_measured), axis=1)
    df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)
    
    df_out.sort_index(axis=1, inplace=True)
    return df_out

In [10]:
Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]
Ys['los_3'] = Ys['los_icu'] > 3
Ys['los_7'] = Ys['los_icu'] > 7
Ys.drop(columns=['los_icu'], inplace=True)
Ys.astype(float)

lvl2, raw = [df[
    (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &
    (df.index.get_level_values('hours_in') < WINDOW_SIZE)
] for df in (data_full_lvl2, data_full_raw)]

#raw.columns = raw.columns.droplevel(level=['label', 'LEVEL1', 'LEVEL2'])

train_frac, dev_frac, test_frac = 0.7, 0.1, 0.2
lvl2_subj_idx, raw_subj_idx, Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, raw, Ys)]
lvl2_subjects = set(lvl2_subj_idx)
assert lvl2_subjects == set(Ys_subj_idx), "Subject ID pools differ!"
#assert lvl2_subjects == set(raw_subj_idx), "Subject ID pools differ!"

np.random.seed(SEED)
subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)
N_train, N_dev, N_test = int(train_frac * N), int(dev_frac * N), int(test_frac * N)
train_subj = subjects[:N_train]
dev_subj   = subjects[N_train:N_train + N_dev]
test_subj  = subjects[N_train+N_dev:]

[(lvl2_train, lvl2_dev, lvl2_test), (raw_train, raw_dev, raw_test), (Ys_train, Ys_dev, Ys_test)] = [
    [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \
    for df in (lvl2, raw, Ys)
]

idx = pd.IndexSlice
lvl2_means, lvl2_stds = lvl2_train.loc[:, idx[:,'mean']].mean(axis=0), lvl2_train.loc[:, idx[:,'mean']].std(axis=0)
#raw_means, raw_stds = raw_train.loc[:, idx[:,'mean']].mean(axis=0), raw_train.loc[:, idx[:,'mean']].std(axis=0)

lvl2_train.loc[:, idx[:,'mean']] = (lvl2_train.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds
lvl2_dev.loc[:, idx[:,'mean']] = (lvl2_dev.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds
lvl2_test.loc[:, idx[:,'mean']] = (lvl2_test.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds

#raw_train.loc[:, idx[:,'mean']] = (raw_train.loc[:, idx[:,'mean']] - raw_means)/raw_stds
#raw_dev.loc[:, idx[:,'mean']] = (raw_dev.loc[:, idx[:,'mean']] - raw_means)/raw_stds
#raw_test.loc[:, idx[:,'mean']] = (raw_test.loc[:, idx[:,'mean']] - raw_means)/raw_stds

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, val, pi)


In [11]:
lvl2_train, lvl2_dev, lvl2_test = [
    simple_imputer(df) for df in (lvl2_train, lvl2_dev, lvl2_test)
]
lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [
    df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (
        lvl2_train, lvl2_dev, lvl2_test
    )
]

for df in lvl2_train, lvl2_dev, lvl2_test: assert not df.isnull().any().any()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [12]:

#raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test = [
#    simple_imputer(df) for df in (raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test)
#]
#raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [
#    df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (
#        raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test
#    )
#]

#for df in lvl2_train, lvl2_dev, lvl2_test, raw_train, raw_dev, raw_test: assert not df.isnull().any().any()



### Task Prediction

#### Hyperparams

In [13]:
N = 15

LR_dist = DictDist({
    'C': Choice(np.geomspace(1e-3, 1e3, 10000)),
    'penalty': Choice(['l1', 'l2']),
    'solver': Choice(['liblinear', 'lbfgs']),
    'max_iter': Choice([100, 500])
})
np.random.seed(SEED)
LR_hyperparams_list = LR_dist.rvs(N)


for i in range(N):
    if LR_hyperparams_list[i]['solver'] == 'lbfgs': LR_hyperparams_list[i]['penalty'] = 'l2'

RF_dist = DictDist({
    'n_estimators': ss.randint(50, 500),
    'max_depth': ss.randint(2, 10),
    'min_samples_split': ss.randint(2, 75),
    'min_samples_leaf': ss.randint(1, 50),
})
np.random.seed(SEED)
RF_hyperparams_list = RF_dist.rvs(N)

In [14]:
def run_basic(model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, target):
    best_s, best_hyperparams = -np.Inf, None
    for i, hyperparams in enumerate(hyperparams_list):
        print("On sample %d / %d (hyperparams = %s)" % (i+1, len(hyperparams_list), repr((hyperparams))))
        M = model(**hyperparams)
        M.fit(X_flat_train, Ys_train[target])
        s = roc_auc_score(Ys_dev[target], M.predict_proba(X_flat_dev)[:, 1])
        if s > best_s:
            best_s, best_hyperparams = s, hyperparams
            print("New Best Score: %.2f @ hyperparams = %s" % (100*best_s, repr((best_hyperparams))))

    return run_only_final(model, best_hyperparams, X_flat_train, X_flat_dev, X_flat_test, target)

def run_only_final(model, best_hyperparams, X_flat_train, X_flat_dev, X_flat_test, target):
    best_M = model(**best_hyperparams)
    best_M.fit(pd.concat((X_flat_train, X_flat_dev)), pd.concat((Ys_train, Ys_dev))[target])
    y_true  = Ys_test[target]
    y_score = best_M.predict_proba(X_flat_test)[:, 1]
    y_pred  = best_M.predict(X_flat_test)

    auc   = roc_auc_score(y_true, y_score)
    auprc = average_precision_score(y_true, y_score)
    acc   = accuracy_score(y_true, y_pred)
    F1    = f1_score(y_true, y_pred)
    
    return best_M, best_hyperparams, auc, auprc, acc, F1

### Sklearn

In [15]:
RESULTS_PATH = '/Users/mengxuan/Desktop/UIUC_MCS/CS598DLH/MIMIC_Extract/scratch/mmd/baselines-sklearn_RF.pkl'
#with open(RESULTS_PATH, mode='rb') as f: results = pickle.load(f)
    
RERUN = True

In [16]:
%%time
results ={}
for model_name, model, hyperparams_list in [
    ('RF', RandomForestClassifier, RF_hyperparams_list), 
#    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in ['mort_icu', 
#              'los_3'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in (
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ):
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model RF on target mort_icu with representation lvl2
On sample 1 / 15 (hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8})
New Best Score: 84.14 @ hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8}
On sample 2 / 15 (hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4})
New Best Score: 84.89 @ hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4}
On sample 3 / 15 (hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7})
New Best Score: 86.69 @ hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7}
On sample 4 / 15 (hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split': 65, 'min_samples_leaf': 22})
New Best Score: 87.87 @ hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split

In [17]:
%%time
for model_name, model, hyperparams_list in [
    ('RF', RandomForestClassifier, RF_hyperparams_list), 
#    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in [
#             'mort_icu', 
              'los_3'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in (
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ):
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model RF on target los_3 with representation lvl2
On sample 1 / 15 (hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8})
New Best Score: 70.61 @ hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8}
On sample 2 / 15 (hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4})
New Best Score: 71.38 @ hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4}
On sample 3 / 15 (hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7})
New Best Score: 72.25 @ hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7}
On sample 4 / 15 (hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split': 65, 'min_samples_leaf': 22})
New Best Score: 72.65 @ hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split': 

In [18]:
%%time
for model_name, model, hyperparams_list in [
    ('RF', RandomForestClassifier, RF_hyperparams_list), 
#    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in [
#        'mort_icu'
#              , los_3',
              'mort_hosp'
#              ,'los_7'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in [
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ]:
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model RF on target mort_hosp with representation lvl2
On sample 1 / 15 (hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8})
New Best Score: 80.72 @ hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8}
On sample 2 / 15 (hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4})
New Best Score: 82.03 @ hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4}
On sample 3 / 15 (hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7})
New Best Score: 84.00 @ hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7}
On sample 4 / 15 (hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split': 65, 'min_samples_leaf': 22})
New Best Score: 85.02 @ hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_spli

In [19]:
%%time
for model_name, model, hyperparams_list in [
    ('RF', RandomForestClassifier, RF_hyperparams_list), 
#    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in [
#              'mort_icu', 
#              'los_3',
#               'mort_hosp', 
               'los_7'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in [
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ]:
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model RF on target los_7 with representation lvl2
On sample 1 / 15 (hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8})
New Best Score: 76.57 @ hyperparams = {'n_estimators': 87, 'max_depth': 3, 'min_samples_split': 15, 'min_samples_leaf': 8}
On sample 2 / 15 (hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4})
New Best Score: 77.18 @ hyperparams = {'n_estimators': 285, 'max_depth': 4, 'min_samples_split': 11, 'min_samples_leaf': 4}
On sample 3 / 15 (hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7})
New Best Score: 77.35 @ hyperparams = {'n_estimators': 446, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 7}
On sample 4 / 15 (hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split': 65, 'min_samples_leaf': 22})
New Best Score: 77.43 @ hyperparams = {'n_estimators': 122, 'max_depth': 8, 'min_samples_split': 

In [20]:
RESULTS_PATH = '/Users/mengxuan/Desktop/UIUC_MCS/CS598DLH/MIMIC_Extract/scratch/mmd/baselines-sklearn_LR.pkl'
#with open(RESULTS_PATH, mode='rb') as f: results = pickle.load(f)
    
RERUN = True

In [21]:
%%time
results ={}
for model_name, model, hyperparams_list in [
#    ('RF', RandomForestClassifier, RF_hyperparams_list), 
    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in ['mort_icu', 
#              'los_3'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in (
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ):
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model LR on target mort_icu with representation lvl2
On sample 1 / 15 (hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 76.60 @ hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500}
On sample 2 / 15 (hyperparams = {'C': 1.3047026700306064, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 3 / 15 (hyperparams = {'C': 0.003491839757169992, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 4 / 15 (hyperparams = {'C': 48.783036208459954, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 5 / 15 (hyperparams = {'C': 0.05459762073728651, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 6 / 15 (hyperparams = {'C': 1.081193410945589, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 7 / 15 (hyperparams = {'C': 0.0012201371230349724, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 8 / 15 (hyperparams = {'C': 0.3429679403524682, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 80.13 @ hyperparams = {'C': 0.3429679403524682, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500}
On sample 9 / 15 (hyperparams = {'C': 44.77804273778909, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 10 / 15 (hyperparams = {'C': 0.11951096159304532, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 11 / 15 (hyperparams = {'C': 433.47464177487655, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
On sample 12 / 15 (hyperparams = {'C': 1.7295128471299193, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})




On sample 13 / 15 (hyperparams = {'C': 1.6777315525707752, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 14 / 15 (hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 82.69 @ hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500}
On sample 15 / 15 (hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100})
New Best Score: 83.53 @ hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100}
Final results for model LR on target mort_icu with representation lvl2
(0.8603509583608725, 0.4302418463584417, 0.9323590814196242, 0.37931034482758624)
CPU times: user 1h 31min 49s, sys: 15.2 s, total: 1h 32min 4s
Wall time: 4h 54min 56s


In [22]:
%%time
for model_name, model, hyperparams_list in [
#    ('RF', RandomForestClassifier, RF_hyperparams_list), 
    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in [
#             'mort_icu', 
              'los_3'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in (
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ):
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model LR on target los_3 with representation lvl2
On sample 1 / 15 (hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 67.23 @ hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500}
On sample 2 / 15 (hyperparams = {'C': 1.3047026700306064, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 3 / 15 (hyperparams = {'C': 0.003491839757169992, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 4 / 15 (hyperparams = {'C': 48.783036208459954, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 5 / 15 (hyperparams = {'C': 0.05459762073728651, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 6 / 15 (hyperparams = {'C': 1.081193410945589, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 7 / 15 (hyperparams = {'C': 0.0012201371230349724, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 8 / 15 (hyperparams = {'C': 0.3429679403524682, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
On sample 9 / 15 (hyperparams = {'C': 44.77804273778909, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 10 / 15 (hyperparams = {'C': 0.11951096159304532, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 11 / 15 (hyperparams = {'C': 433.47464177487655, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
On sample 12 / 15 (hyperparams = {'C': 1.7295128471299193, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 67.30 @ hyperparams = {'C': 1.7295128471299193, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500}
On sample 13 / 15 (hyperparams = {'C': 1.6777315525707752, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 14 / 15 (hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 68.88 @ hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500}
On sample 15 / 15 (hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100})
New Best Score: 69.25 @ hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100}
Final results for model LR on target los_3 with representation lvl2
(0.6999527741745761, 0.6382746863137819, 0.6682672233820459, 0.5560212349818385)
CPU times: user 3h 7min 16s, sys: 37.6 s, total: 3h 7min 54s
Wall time: 3h 28min 6s


In [23]:
%%time
for model_name, model, hyperparams_list in [
#    ('RF', RandomForestClassifier, RF_hyperparams_list), 
    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in [
#              'mort_icu', 
#              'los_3',
#               'mort_hosp', 
               'los_7'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in [
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ]:
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model LR on target los_7 with representation lvl2
On sample 1 / 15 (hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 68.19 @ hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500}
On sample 2 / 15 (hyperparams = {'C': 1.3047026700306064, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 3 / 15 (hyperparams = {'C': 0.003491839757169992, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 4 / 15 (hyperparams = {'C': 48.783036208459954, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 5 / 15 (hyperparams = {'C': 0.05459762073728651, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 6 / 15 (hyperparams = {'C': 1.081193410945589, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 7 / 15 (hyperparams = {'C': 0.0012201371230349724, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 8 / 15 (hyperparams = {'C': 0.3429679403524682, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
On sample 9 / 15 (hyperparams = {'C': 44.77804273778909, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 10 / 15 (hyperparams = {'C': 0.11951096159304532, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 11 / 15 (hyperparams = {'C': 433.47464177487655, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 68.26 @ hyperparams = {'C': 433.47464177487655, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500}
On sample 12 / 15 (hyperparams = {'C': 1.7295128471299193, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
On sample 13 / 15 (hyperparams = {'C': 1.6777315525707752, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 14 / 15 (hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
On sample 15 / 15 (hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100})
Final results for model LR on target los_7 with representation lvl2
(0.6718047412553034, 0.1381086172945656, 0.9156576200417537, 0.028846153846153844)
CPU times: user 1h 27min 55s, sys: 11.5 s, total: 1h 28min 6s
Wall time: 1h 28min 7s


In [24]:
%%time
for model_name, model, hyperparams_list in [
#    ('RF', RandomForestClassifier, RF_hyperparams_list), 
    ('LR', LogisticRegression, LR_hyperparams_list)
]:
    if model_name not in results: results[model_name] = {}
    for t in [
#              'mort_icu', 
#              'los_3',
               'mort_hosp'
#               ,'los_7'
             ]:
        if t not in results[model_name]: results[model_name][t] = {}
        for n, X_flat_train, X_flat_dev, X_flat_test in [
            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)
        ]:
            if n in results[model_name][t]:
                print("Finished model %s on target %s with representation %s" % (model_name, t, n))
                if RERUN: 
                    h = results[model_name][t][n][1]
                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)
                    
                    print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
                    print(results[model_name][t][n][2:])

                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)
                continue
                
            print("Running model %s on target %s with representation %s" % (model_name, t, n))
            results[model_name][t][n] = run_basic(
                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t
            )
            print("Final results for model %s on target %s with representation %s" % (model_name, t, n))
            print(results[model_name][t][n][2:])
            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)

Running model LR on target mort_hosp with representation lvl2
On sample 1 / 15 (hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 73.46 @ hyperparams = {'C': 0.001383611303681924, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500}
On sample 2 / 15 (hyperparams = {'C': 1.3047026700306064, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 3 / 15 (hyperparams = {'C': 0.003491839757169992, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 4 / 15 (hyperparams = {'C': 48.783036208459954, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 5 / 15 (hyperparams = {'C': 0.05459762073728651, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 6 / 15 (hyperparams = {'C': 1.081193410945589, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 7 / 15 (hyperparams = {'C': 0.0012201371230349724, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 8 / 15 (hyperparams = {'C': 0.3429679403524682, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 78.47 @ hyperparams = {'C': 0.3429679403524682, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500}
On sample 9 / 15 (hyperparams = {'C': 44.77804273778909, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 10 / 15 (hyperparams = {'C': 0.11951096159304532, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 100})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 11 / 15 (hyperparams = {'C': 433.47464177487655, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
On sample 12 / 15 (hyperparams = {'C': 1.7295128471299193, 'penalty': 'l2', 'solver': 'liblinear', 'max_iter': 500})
On sample 13 / 15 (hyperparams = {'C': 1.6777315525707752, 'penalty': 'l2', 'solver': 'lbfgs', 'max_iter': 500})


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


On sample 14 / 15 (hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500})
New Best Score: 82.57 @ hyperparams = {'C': 0.059893230394471704, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 500}
On sample 15 / 15 (hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100})
New Best Score: 82.90 @ hyperparams = {'C': 0.032340816118081595, 'penalty': 'l1', 'solver': 'liblinear', 'max_iter': 100}
Final results for model LR on target mort_hosp with representation lvl2
(0.8482830925331123, 0.47076738024277803, 0.9073068893528183, 0.40322580645161293)
CPU times: user 1h 13min 57s, sys: 19.3 s, total: 1h 14min 17s
Wall time: 1h 14min 18s
