In [1]:
# All as One: Adaboost
# Treat All Participants as One
# Try with 10% ... 90% for Training (several iterations for each)

In [2]:
use_key_features = False
shuffle_times = 100
iterations = 10
if use_key_features:
    output_file = 'all_kf_adaboost_results.csv'
else:
    output_file = 'all_af_adaboost_results.csv'

In [3]:
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn import metrics
from itertools import combinations
import itertools
import json
pd.options.mode.chained_assignment = None

# 1 -- Load Data

In [4]:
# Windowed Data: 1 Second Overlapping Windows, Feature Median + Variance in Window 
p5_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p5_master_window.csv'
p7_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p7_master_window.csv'
p9_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p9_master_window.csv'
p11_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p11_master_window.csv'
p12_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p12_master_window.csv'
p17_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p17_master_window.csv'
p18_file = '../../../../../Google Drive File Stream/My Drive/USC Expeditions Year 5/Analysis/Engagement/Data/Master Window/p18_master_window.csv'

data5 = pd.read_csv(p5_file)
data7 = pd.read_csv(p7_file)
data9 = pd.read_csv(p9_file)
data11 = pd.read_csv(p11_file)
data12 = pd.read_csv(p12_file)
data17 = pd.read_csv(p17_file)
data18 = pd.read_csv(p18_file)

In [5]:
data = [data5, data7, data9, data11, data12, data17, data18]
all_data = pd.concat(data, ignore_index=True, sort=True)

In [6]:
remove = [
'engagement_change',
'ros_GAME_STATE',
'ros_PARTICIPANT_STATE',
'ros_ROBOT_STATE',
'ros_activity',
'ros_diff_1_change',
'ros_diff_2_change',
'ros_diff_3_change',
'ros_diff_4_change',
'ros_diff_5_change',
'ros_difficulty',
'ros_skill_EM_change',
'ros_skill_NC_change',
'ros_skill_OS_change',
'ros_ts_attempt_var',
'ros_games_session_change',
'ros_in_game_change',
'ros_mistakes_session_change',
'ros_mistakes_game_change',
'ros_skill',
'ros_ts_game_start_var',
'ros_ts_robot_talked_var',
'ros_game_correct',
'ros_game_incorrect',
'ros_game_start',
'ros_mistake_made',
]

all_data = all_data.drop(columns=remove)

In [7]:
# Remove rows where engagment NaN
all_data = all_data[np.isfinite(all_data['engagement'])]

# Remove rows where engagment is -1
all_data = all_data[all_data['engagement']>=0]

# 2 -- Choose Feature Set

Feature Dictionary: https://docs.google.com/spreadsheets/d/1ewoVPHwW68Ins0AOVZf-0lsl_wW0_ZzuByuDiNJETBY/edit?usp=sharing

### Data Overview

In [8]:
# Main Columns
basic_cols = []
for i in all_data.columns:
    if 'of_' not in i and 'op_' not in i and 'ros_' not in i and 'a_' not in i:
        basic_cols.append(i)
        
basic_cols = sorted(basic_cols)
for i in basic_cols:
    print(i)

engagement
participant
session_num
timestamp


In [9]:
# Open Face Columns

of_cols = []
for i in all_data.columns:
    if ('of_' in i or 'op_' in i) and '_change' not in i and '_var' not in i:
        of_cols.append(i)
        
of_cols = sorted(of_cols)
for i in of_cols:
    print(i)

of_AU01_c
of_AU02_c
of_AU04_c
of_AU05_c
of_AU06_c
of_AU07_c
of_AU09_c
of_AU10_c
of_AU12_c
of_AU14_c
of_AU15_c
of_AU17_c
of_AU20_c
of_AU23_c
of_AU25_c
of_AU26_c
of_AU28_c
of_AU45_c
of_confidence
of_gaze_0_x
of_gaze_0_y
of_gaze_0_z
of_gaze_1_x
of_gaze_1_y
of_gaze_1_z
of_gaze_angle_x
of_gaze_angle_y
of_gaze_distance
of_gaze_distance_x
of_gaze_distance_y
of_pose_Rx
of_pose_Ry
of_pose_Rz
of_pose_Tx
of_pose_Ty
of_pose_Tz
of_pose_distance
of_success
of_ts_success
op_num_people


In [10]:
# Audio Columns

a_cols = []
for i in all_data.columns:
    if 'a_' in i and '_change' not in i and '_var' not in i:
        a_cols.append(i)
        
a_cols = sorted(a_cols)
for i in a_cols:
    print(i)

a_harmonicity
a_intensity
a_mfcc_0
a_mfcc_1
a_pitch_frequency
a_pitch_strength


In [11]:
# ROS Columns

ros_cols = []
for i in all_data.columns:
    if 'ros_' in i and '_change' not in i and '_var' not in i:
        ros_cols.append(i)
        
ros_cols = sorted(ros_cols)
for i in ros_cols:
    print(i)

ros_aptitude
ros_diff_1
ros_diff_2
ros_diff_3
ros_diff_4
ros_diff_5
ros_games_session
ros_in_game
ros_mistakes_game
ros_mistakes_session
ros_skill_EM
ros_skill_NC
ros_skill_OS
ros_ts_attempt
ros_ts_game_start
ros_ts_robot_talked


In [12]:
# For Window Only:
non_window_features = []
window_features = []
for i in all_data.columns:
    if i not in basic_cols:
        if 'change' in i or 'var' in i:
            window_features.append(i)
        else:
            non_window_features.append(i)
        
window_features = sorted(window_features)
for i in window_features:
    print(i)

a_harmonicity_var
a_intensity_var
a_mfcc_0_var
a_mfcc_1_var
a_pitch_frequency_var
a_pitch_strength_var
of_AU01_c_change
of_AU02_c_change
of_AU04_c_change
of_AU05_c_change
of_AU06_c_change
of_AU07_c_change
of_AU09_c_change
of_AU10_c_change
of_AU12_c_change
of_AU14_c_change
of_AU15_c_change
of_AU17_c_change
of_AU20_c_change
of_AU23_c_change
of_AU25_c_change
of_AU26_c_change
of_AU28_c_change
of_AU45_c_change
of_confidence_var
of_gaze_0_x_var
of_gaze_0_y_var
of_gaze_0_z_var
of_gaze_1_x_var
of_gaze_1_y_var
of_gaze_1_z_var
of_gaze_angle_x_var
of_gaze_angle_y_var
of_gaze_distance_var
of_gaze_distance_x_var
of_gaze_distance_y_var
of_pose_Rx_var
of_pose_Ry_var
of_pose_Rz_var
of_pose_Tx_var
of_pose_Ty_var
of_pose_Tz_var
of_pose_distance_var
of_success_change
of_ts_success_var
op_num_people_change
ros_aptitude_var


In [13]:
# Columns where NaNs filled with max value
nan_max_cols = ['of_gaze_0_x',
'of_gaze_0_y',
'of_gaze_0_z',
'of_gaze_1_x',
'of_gaze_1_y',
'of_gaze_1_z',
'of_gaze_angle_x',
'of_gaze_angle_y',
'of_gaze_distance',
'of_gaze_distance_x',
'of_gaze_distance_y',
'of_pose_Rxv',
'of_pose_Ry',
'of_pose_Rz',
'of_pose_Tx',
'of_pose_Ty',
'of_pose_Tz',
'of_pose_distance']

In [14]:
# Key Features (from feature analysis) 
# Note: Timestamp automatically included

key_features = [
'op_num_people',
'of_pose_distance',
'of_gaze_distance',
'of_confidence',
'ros_mistakes_session',
'ros_ts_robot_talked']

### Filter Feature Set (Optional)

In [15]:
# always include basic_cols, add desired group of features 
# features_to_keep = all_data.columns

if use_key_features:
    features_to_keep = basic_cols + key_features
else:
    features_to_keep = basic_cols + of_cols + ros_cols + a_cols + window_features

all_data = all_data[features_to_keep]

In [16]:
# All as One Participant: Shuffle Data! 

for i in range(shuffle_times):
    all_data.reindex(np.random.permutation(all_data.index))
all_data = all_data.reset_index(drop=True)

# 3 -- Scenario Based Modeling

- Open Face Success / Failure
- Robot Talking / Not Talking
- First 10 Minutes / After 10 Minutes

In [17]:
# Create Separate Models for Different Scenarios

# for i,d in enumerate(data):
#     # Open Face Success/Failure
#     data[i] = d.loc[d['of_success']==1]
#     data[i] = d.loc[d['of_success']==0]
    
#     # Robot Talking/Not Talking
#     data[i] = d.loc[d['ros_ts_robot_talked']==0]
#     data[i] = d.loc[d['ros_ts_robot_talked']>0]
    
#     # First 10 Min/After 10 Min
#     data[i] = d.loc[d['timestamp']<=(10*60)]
#     data[i] = d.loc[d['timestamp']>(10*60)]

# 4 -- Modeling

In [18]:
# Function: Formulate Train-Test Split 
# Includes Preprocessing 

# split_size: how much data for training
def split(split_size):
    y_data = all_data['engagement'].copy()
    X_data = all_data.drop(columns=['engagement', 'session_num', 'participant'], axis=1).copy()
        
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=1-split_size, shuffle=True)

    # Preprocessing: Standardization
    # x' = ( x - mean(x) ) / ( stdev(x) )
    # => x' is z-score, NaN's filled with min_val
    for c in X_train.columns:
        mean = np.nanmean(X_train[c])
        std = np.nanstd(X_train[c])

        if std == 0:
            X_train[c] = (X_train[c]-mean)
            X_test[c] = (X_test[c]-mean)
        else:
            X_train[c] = (X_train[c]-mean)/(std)
            X_test[c] = (X_test[c]-mean)/(std)

        if c not in nan_max_cols:
            min_val = np.nanmin(X_train[c])

            X_train[c] = X_train[c].fillna(min_val)
            X_test[c] = X_test[c].fillna(min_val)
        else:
            max_val = np.nanmax(X_train[c])

            X_train[c] = X_train[c].fillna(max_val)
            X_test[c] = X_test[c].fillna(max_val) 

    return X_train, y_train, X_test, y_test

In [19]:
# MODEL HERE

# Inputs: X_train, y_train, X_test, y_test, hp, isVerbose
# hp = Hyperparameter Dictionary, isVerbose = whether to return all accuracy metrics

# Output: Accuracy Metrics as Dictionary 

# IMPORTS HERE
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

# Hyperparameter Combinations to Try:  
def model(X_train, y_train, X_test, y_test, hp, isVerbose):
    # Model Here: must create pred & scores arrays
    model = AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=100))
    model.fit(X_train.values, y_train.values)
    
    scores = model.predict(X_test.values)
    pred = [round(value) for value in scores] 
    
    # Evaluation
    results = {}
    accuracy = accuracy_score(y_test, pred)
    results['accuracy'] = accuracy
    
    if isVerbose:
        try:
            auc = roc_auc_score(y_test, scores)
        except:
            auc = np.nan
        results['auc'] = auc
    
        precision = metrics.precision_score(y_test, pred, average=None)
        recall = metrics.recall_score(y_test, pred, average=None)
        f1 = metrics.f1_score(y_test, pred, average=None)

        results['precision_0'], results['precision_1'] = precision[0], precision[1]
        results['recall_0'], results['recall_1'] = recall[0], recall[1]
        results['f1_0'], results['f1_1'] = f1[0], f1[1]
    
    return results

In [21]:
all_results = []
for perc in tqdm([0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]):
    print('Training Percentage:', perc)
    for n in range(iterations):
        print()
        new_results = {}
        
        # Shuffle Data For This Iteration
        for i in range(shuffle_times):
            all_data.reindex(np.random.permutation(all_data.index))        
        all_data = all_data.reset_index(drop=True)

        # Train - Test Split (and preprocessing)
        X_train, y_train, X_test, y_test = split(perc)
        X_train = X_train.reset_index(drop=True)
        y_train = y_train.reset_index(drop=True)
        '''
        # Set Up Cross Validation Groups
        div = int(len(X_train)/3)
        X_cv1 = X_train.loc[:div]
        X_cv2 = X_train.loc[div+1:div*2]
        X_cv3 = X_train.loc[(div*2)+1:]
        
        y_cv1 = y_train.loc[:div]
        y_cv2 = y_train.loc[div+1:div*2]
        y_cv3 = y_train.loc[(div*2)+1:]
        
        cv1_X_train = X_cv1.append(X_cv2)
        cv1_X_test = X_cv3
        cv1_y_train = y_cv1.append(y_cv2)
        cv1_y_test = y_cv3
        
        cv2_X_train = X_cv1.append(X_cv3)
        cv2_X_test = X_cv2
        cv2_y_train = y_cv1.append(y_cv3)
        cv2_y_test = y_cv2

        cv3_X_train = X_cv3.append(X_cv2)
        cv3_X_test = X_cv1
        cv3_y_train = y_cv3.append(y_cv2)
        cv3_y_test = y_cv1
        
        # Hyperparameter Combinations        
        hp_combos = list(itertools.product(*hp_list))
        best, best_index, best_acc = None, -1, 0
        for index,h in enumerate(hp_combos):
            hp = {}
            for i,v in enumerate(h):
                hp[hp_names[i]] = v
            
            # Cross-Validation
            res1 = model(cv1_X_train, cv1_y_train, cv1_X_test, cv1_y_test, hp, False)
            res2 = model(cv2_X_train, cv2_y_train, cv2_X_test, cv2_y_test, hp, False)
            res3 = model(cv3_X_train, cv3_y_train, cv3_X_test, cv3_y_test, hp, False)
            
            acc = np.mean([res1['accuracy'], res2['accuracy'], res3['accuracy']])
            print(hp, acc)
            if acc > best_acc:
                best = hp
                best_index = index
                best_acc = acc
        '''
        best = {}
        # Evaluate on Test Using Best Hyperaparemeters  
        print('Best HP', best)
        results = model(X_train, y_train, X_test, y_test, best, True)
        print(results)
        results['train_percentage'] = perc
        results['iteration'] = n
        results['hyperparameters'] = ""
        all_results.append(results)

  0%|          | 0/11 [00:00<?, ?it/s]

Training Percentage: 0.01

Best HP {}
{'f1_1': 0.8681256378274181, 'precision_1': 0.8433575677461996, 'precision_0': 0.7752303182579564, 'auc': 0.7905847010154062, 'recall_1': 0.8943925233644859, 'recall_0': 0.6867768786663266, 'f1_0': 0.7283278212867453, 'accuracy': 0.8224413213444864}

Best HP {}
{'f1_1': 0.8747424206365331, 'precision_1': 0.8282840496867099, 'precision_0': 0.8220376952716628, 'auc': 0.7823194118964911, 'recall_1': 0.9267221764423728, 'recall_0': 0.6379166473506096, 'f1_0': 0.7183670273162709, 'accuracy': 0.8266036689728322}

Best HP {}
{'f1_1': 0.8679093375339109, 'precision_1': 0.8257143491485536, 'precision_0': 0.7980329967701574, 'auc': 0.7752997315018633, 'recall_1': 0.9146489922900041, 'recall_0': 0.6359504707137226, 'f1_0': 0.7078316751190429, 'accuracy': 0.8180700527927103}

Best HP {}
{'f1_1': 0.8631222927668665, 'precision_1': 0.8340328425301592, 'precision_0': 0.7692307692307693, 'auc': 0.7793464396489354, 'recall_1': 0.8943142414479994, 'recall_0': 0.6643

  9%|▉         | 1/11 [00:26<04:20, 26.02s/it]

{'f1_1': 0.8679716587447688, 'precision_1': 0.8368870422792439, 'precision_0': 0.7824434733041992, 'auc': 0.7850115135034517, 'recall_1': 0.9014545141578449, 'recall_0': 0.6685685128490584, 'f1_0': 0.7210375576872569, 'accuracy': 0.8207699539570427}
Training Percentage: 0.05

Best HP {}
{'f1_1': 0.8909775358053756, 'precision_1': 0.8542825230733055, 'precision_0': 0.8432171332130595, 'auc': 0.8156959263228545, 'recall_1': 0.9309664441568758, 'recall_0': 0.7004254084888331, 'f1_0': 0.7652169320552431, 'accuracy': 0.8510982155567279}

Best HP {}
{'f1_1': 0.8894677988178769, 'precision_1': 0.8545321464831299, 'precision_0': 0.8367276393489846, 'auc': 0.8147629315394803, 'recall_1': 0.927381745502998, 'recall_0': 0.7021441175759626, 'f1_0': 0.7635508122601335, 'accuracy': 0.8493564783413302}

Best HP {}
{'f1_1': 0.8904799235650064, 'precision_1': 0.8530809580248072, 'precision_0': 0.8432347093907145, 'auc': 0.8143075379746688, 'recall_1': 0.9313083693103934, 'recall_0': 0.697306706638944, 

 18%|█▊        | 2/11 [11:02<31:23, 209.27s/it]

{'f1_1': 0.8878277819323044, 'precision_1': 0.8506806996111966, 'precision_0': 0.8370275314979001, 'auc': 0.8107015373505689, 'recall_1': 0.9283672396774731, 'recall_0': 0.6930358350236646, 'f1_0': 0.7582562747688242, 'accuracy': 0.8467606199914588}
Training Percentage: 0.1

Best HP {}
{'f1_1': 0.9043432996063376, 'precision_1': 0.8712200992929141, 'precision_0': 0.8672839506172839, 'auc': 0.8390720505206681, 'recall_1': 0.9400846850015557, 'recall_0': 0.7380594160397806, 'f1_0': 0.797470621461143, 'accuracy': 0.8700590439824636}

Best HP {}
{'f1_1': 0.9019436984243622, 'precision_1': 0.867818752893772, 'precision_0': 0.8640700719381151, 'auc': 0.8349391534403026, 'recall_1': 0.9388622640998565, 'recall_0': 0.7310160427807486, 'f1_0': 0.7919937097374918, 'accuracy': 0.8667179323999434}

Best HP {}
{'f1_1': 0.9033491827057544, 'precision_1': 0.870066407718331, 'precision_0': 0.8653003660805377, 'auc': 0.837392996863215, 'recall_1': 0.9392795790555803, 'recall_0': 0.7355064146708495, 'f1

 27%|██▋       | 3/11 [36:37<1:20:54, 606.83s/it]

{'f1_1': 0.9061760499284879, 'precision_1': 0.8726475928128717, 'precision_0': 0.8719305100538006, 'auc': 0.8413968873202105, 'recall_1': 0.9423838820904604, 'recall_0': 0.7404098925499605, 'f1_0': 0.8008060509026667, 'accuracy': 0.8724367133361618}
Training Percentage: 0.2

Best HP {}
{'f1_1': 0.9195628978255271, 'precision_1': 0.8912376795469782, 'precision_0': 0.8914570884496684, 'auc': 0.8652467848117593, 'recall_1': 0.9497476743479054, 'recall_0': 0.7807458952756132, 'f1_0': 0.8324365754579598, 'accuracy': 0.8913041316561428}

Best HP {}
{'f1_1': 0.9188844317656808, 'precision_1': 0.8876200731064577, 'precision_0': 0.8956408631557883, 'auc': 0.8622044296674476, 'recall_1': 0.9524316271150367, 'recall_0': 0.7719772322198586, 'f1_0': 0.8292238570921273, 'accuracy': 0.8900114353900462}

Best HP {}
{'f1_1': 0.9193484481620076, 'precision_1': 0.8883610788122855, 'precision_0': 0.8961821389341943, 'auc': 0.8631602918890746, 'recall_1': 0.9525757207152414, 'recall_0': 0.7737448630629077,

 36%|███▋      | 4/11 [1:20:59<2:22:43, 1223.39s/it]

{'f1_1': 0.9190871369294606, 'precision_1': 0.8890897445021707, 'precision_0': 0.8942263279445728, 'auc': 0.8639436659289903, 'recall_1': 0.9511793997167614, 'recall_0': 0.7767079321412196, 'f1_0': 0.8313345397662791, 'accuracy': 0.8906378958882315}
Training Percentage: 0.3

Best HP {}
{'f1_1': 0.9262535879931649, 'precision_1': 0.8967175174392072, 'precision_0': 0.9090055933030519, 'auc': 0.8751923918983828, 'recall_1': 0.9578016468499208, 'recall_0': 0.7925831369468447, 'f1_0': 0.8468115612596386, 'accuracy': 0.900437524859367}

Best HP {}
{'f1_1': 0.9275552858683925, 'precision_1': 0.898360976883897, 'precision_0': 0.9113654212947381, 'auc': 0.8776067489563836, 'recall_1': 0.9587108013937282, 'recall_0': 0.796502696519039, 'f1_0': 0.8500715107963861, 'accuracy': 0.9023126313995113}

Best HP {}
{'f1_1': 0.9272797879345569, 'precision_1': 0.8990456234550723, 'precision_0': 0.9090605024528021, 'auc': 0.8779965553202324, 'recall_1': 0.9573448149633065, 'recall_0': 0.7986482956771581, 'f

 45%|████▌     | 5/11 [2:22:01<3:15:29, 1954.98s/it]

{'f1_1': 0.9294224862456543, 'precision_1': 0.9021098152273621, 'precision_0': 0.911381600920325, 'auc': 0.8813654066011904, 'recall_1': 0.9584406543682562, 'recall_0': 0.8042901588341248, 'f1_0': 0.854493580599144, 'accuracy': 0.9049491448377749}
Training Percentage: 0.4

Best HP {}
{'f1_1': 0.9338099176808854, 'precision_1': 0.9077108618247253, 'precision_0': 0.9180108686276202, 'auc': 0.8883920225076198, 'recall_1': 0.9614542357759844, 'recall_0': 0.8153298092392554, 'f1_0': 0.8636289864481052, 'accuracy': 0.9108771743742045}

Best HP {}
{'f1_1': 0.9328558639212176, 'precision_1': 0.9052565253088424, 'precision_0': 0.9191792113140428, 'auc': 0.8862197897409195, 'recall_1': 0.962191013516256, 'recall_0': 0.8102485659655831, 'f1_0': 0.8612833072498527, 'accuracy': 0.9095115613067458}

Best HP {}
{'f1_1': 0.9338276731425182, 'precision_1': 0.909276444992106, 'precision_0': 0.9156518777143831, 'auc': 0.8900053955555842, 'recall_1': 0.9597415001930619, 'recall_0': 0.8202692909181065, 'f1

 55%|█████▍    | 6/11 [3:39:44<3:50:37, 2767.42s/it]

{'f1_1': 0.9345847947438621, 'precision_1': 0.9101878223385421, 'precision_0': 0.9167092924126172, 'auc': 0.8910060747284663, 'recall_1': 0.960325678666423, 'recall_0': 0.8216864707905093, 'f1_0': 0.8666008502407672, 'accuracy': 0.9122162706830717}
Training Percentage: 0.5

Best HP {}
{'f1_1': 0.9398774165357772, 'precision_1': 0.9155603616869521, 'precision_0': 0.9267148014440433, 'auc': 0.8979601208547263, 'recall_1': 0.9655214247585772, 'recall_0': 0.8303988169508757, 'f1_0': 0.8759170342928172, 'accuracy': 0.9190014796429765}

Best HP {}
{'f1_1': 0.9384364126961585, 'precision_1': 0.9151087107048885, 'precision_0': 0.9219542172376709, 'auc': 0.8966714654560763, 'recall_1': 0.962984552608569, 'recall_0': 0.8303583783035838, 'f1_0': 0.8737623762376238, 'accuracy': 0.9172354541549329}

Best HP {}
{'f1_1': 0.939221013418121, 'precision_1': 0.915658470008559, 'precision_0': 0.9247350183448838, 'auc': 0.8983634121378836, 'recall_1': 0.9640282513395032, 'recall_0': 0.8326985729362639, 'f1

 64%|██████▎   | 7/11 [5:13:43<4:01:55, 3628.86s/it]

{'f1_1': 0.9397821453942694, 'precision_1': 0.9148264257065142, 'precision_0': 0.9285604231934672, 'auc': 0.8982237902086418, 'recall_1': 0.9661375918983397, 'recall_0': 0.8303099885189438, 'f1_0': 0.8766910730737526, 'accuracy': 0.919081030340636}
Training Percentage: 0.6

Best HP {}
{'f1_1': 0.9443243403322771, 'precision_1': 0.9209241877256318, 'precision_0': 0.9347298505556265, 'auc': 0.9056764308740867, 'recall_1': 0.9689446655929989, 'recall_0': 0.8424081961551744, 'f1_0': 0.8861709857108259, 'accuracy': 0.9252232364815146}

Best HP {}
{'f1_1': 0.9434672994216224, 'precision_1': 0.9202661651960076, 'precision_0': 0.9328158798829368, 'auc': 0.9048207159016842, 'recall_1': 0.9678685531720675, 'recall_0': 0.8417728786313009, 'f1_0': 0.8849589570255916, 'accuracy': 0.9241890897520036}

Best HP {}
{'f1_1': 0.9434337734083788, 'precision_1': 0.9194430775465308, 'precision_0': 0.934799186578546, 'auc': 0.9048088821203556, 'recall_1': 0.9687099725526075, 'recall_0': 0.8409077916881038, '

 73%|███████▎  | 8/11 [7:05:47<3:47:51, 4557.28s/it]

{'f1_1': 0.9435776085188198, 'precision_1': 0.9207677875595324, 'precision_0': 0.9315769279959074, 'auc': 0.9044974382964575, 'recall_1': 0.967546254170458, 'recall_0': 0.8414486224224571, 'f1_0': 0.8842220266456253, 'accuracy': 0.9241294274406857}
Training Percentage: 0.7

Best HP {}
{'f1_1': 0.9456287235516498, 'precision_1': 0.9234575537343436, 'precision_0': 0.935241472475515, 'auc': 0.9086239472880766, 'recall_1': 0.9688906915432975, 'recall_0': 0.8483572030328559, 'f1_0': 0.8896831452552106, 'accuracy': 0.9271584641493423}

Best HP {}
{'f1_1': 0.9459251210115578, 'precision_1': 0.923608163895212, 'precision_0': 0.9358093784448401, 'auc': 0.908613397798608, 'recall_1': 0.9693472627146097, 'recall_0': 0.847879532882606, 'f1_0': 0.8896771332983998, 'accuracy': 0.9274236317352567}

Best HP {}
{'f1_1': 0.9449309899712044, 'precision_1': 0.9219531098624297, 'precision_0': 0.9362559838750315, 'auc': 0.9080336220638875, 'recall_1': 0.9690835030549898, 'recall_0': 0.8469837410727853, 'f1_

 82%|████████▏ | 9/11 [9:17:02<3:05:05, 5552.61s/it]

{'f1_1': 0.9467730810917332, 'precision_1': 0.922610131735607, 'precision_0': 0.9412419700214133, 'auc': 0.9086416953835004, 'recall_1': 0.9722357131293509, 'recall_0': 0.84504767763765, 'f1_0': 0.8905547226386807, 'accuracy': 0.9283782350445482}
Training Percentage: 0.8

Best HP {}
{'f1_1': 0.9489890116434888, 'precision_1': 0.9268803443662381, 'precision_0': 0.9426487234310149, 'auc': 0.9142769214853217, 'recall_1': 0.9721781574130567, 'recall_0': 0.8563756855575868, 'f1_0': 0.8974435730108362, 'accuracy': 0.9318669954657545}

Best HP {}
{'f1_1': 0.9482794269679515, 'precision_1': 0.927485988328422, 'precision_0': 0.9366943203573708, 'auc': 0.91199723694328, 'recall_1': 0.9700265893159294, 'recall_0': 0.8539678845706307, 'f1_0': 0.8934201716476962, 'accuracy': 0.930355580303874}

Best HP {}
{'f1_1': 0.9480496348631479, 'precision_1': 0.9256811594202898, 'precision_0': 0.9406994424733908, 'auc': 0.9121355759437703, 'recall_1': 0.9715259187150158, 'recall_0': 0.8527452331725247, 'f1_0'

 91%|█████████ | 10/11 [11:47:26<1:49:54, 6594.28s/it]

{'f1_1': 0.9488990196954799, 'precision_1': 0.9276975064087625, 'precision_0': 0.9405866131862622, 'auc': 0.9145913051561922, 'recall_1': 0.971092272976764, 'recall_0': 0.8580903373356203, 'f1_0': 0.8974466303892843, 'accuracy': 0.9317874472993397}
Training Percentage: 0.9

Best HP {}
{'f1_1': 0.9526574628192215, 'precision_1': 0.9327068105348648, 'precision_0': 0.944585020242915, 'auc': 0.9195015497481889, 'recall_1': 0.9734802615645435, 'recall_0': 0.865522837931834, 'f1_0': 0.9033272837265578, 'accuracy': 0.9364410150346034}

Best HP {}
{'f1_1': 0.9517470881863561, 'precision_1': 0.9318128927158482, 'precision_0': 0.9431732461654514, 'auc': 0.9187181948974735, 'recall_1': 0.9725528297303863, 'recall_0': 0.8648835600645608, 'f1_0': 0.902333413519365, 'accuracy': 0.9354068888712115}

Best HP {}
{'f1_1': 0.9505386584131897, 'precision_1': 0.9323914058851004, 'precision_0': 0.9371100573995508, 'auc': 0.917905752847168, 'recall_1': 0.9694063372587107, 'recall_0': 0.8664051684356253, 'f1_

100%|██████████| 11/11 [14:38:27<00:00, 7694.15s/it]  

{'f1_1': 0.9477728418777763, 'precision_1': 0.9238150965476887, 'precision_0': 0.9456035767511177, 'auc': 0.9134883382584389, 'recall_1': 0.9730062862073215, 'recall_0': 0.8539703903095559, 'f1_0': 0.8974540311173974, 'accuracy': 0.9307930952191552}





In [22]:
results = pd.DataFrame(columns=['train_percentage', 'iteration', 'accuracy', 'auc', 'precision_0', 'precision_1', 'recall_0', 'recall_1', 'f1_0', 'f1_1', 'hyperparameters'])
results = results.append(all_results, ignore_index=True, sort=True)

In [23]:
results.head()

Unnamed: 0,accuracy,auc,f1_0,f1_1,hyperparameters,iteration,precision_0,precision_1,recall_0,recall_1,train_percentage
0,0.822441,0.790585,0.728328,0.868126,,0,0.77523,0.843358,0.686777,0.894393,0.01
1,0.826604,0.782319,0.718367,0.874742,,1,0.822038,0.828284,0.637917,0.926722,0.01
2,0.81807,0.7753,0.707832,0.867909,,2,0.798033,0.825714,0.63595,0.914649,0.01
3,0.814639,0.779346,0.71297,0.863122,,3,0.769231,0.834033,0.664379,0.894314,0.01
4,0.816326,0.781128,0.71535,0.864421,,4,0.77162,0.835405,0.666729,0.895527,0.01


In [24]:
results.to_csv(output_file, index=False)