# FLAML - Team Goal

This notebook is used for the appliation of ML algorithms to the Principal Components from the Ethereum, Credit Card, and Insurance datasets.

In [1]:
import pandas as pd
import numpy as np
import flaml
from flaml import AutoML

## Ethereum

In [2]:
# read in the data for modeling
data = pd.read_pickle('Data/principal_components.pkl')
print(data.shape)
data.head()

(71250, 15)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,value
0,-2.006815,-2.169782,0.1464,0.467598,-0.327913,0.751913,-0.521793,-0.761362,0.78803,0.315258,-0.054779,0.023103,-0.001547,1.587008e-17,-0.028777
1,-1.352242,-1.128665,-1.880299,0.636689,-0.067591,0.997769,-0.122053,-0.728717,0.883459,-0.04829,0.590403,0.036177,-0.002929,-1.502062e-16,-0.035048
2,-1.678475,-2.004593,-0.426661,0.696365,-0.14081,0.824183,-0.329805,-0.819126,0.71446,-0.011513,0.009796,0.033345,-0.001582,-2.3956130000000002e-17,-0.035048
3,-1.505144,-1.625462,-1.122661,0.62657,-0.059864,0.908407,-0.226611,-0.79167,0.777155,-0.030135,-0.182153,0.033123,-0.000876,5.0636980000000003e-17,-0.035048
4,-1.487477,-1.586732,-1.193951,0.619052,-0.051163,0.917079,-0.216019,-0.788997,0.783427,-0.032049,-0.205875,0.033085,-0.000792,5.93106e-17,-0.035048


In [3]:
# get a single column indicating if a transaction is fraud as our target
to_scam = np.array(pd.read_pickle('Data/to_scam.pkl'))
from_scam = np.array(pd.read_pickle('Data/from_scam.pkl'))

temp = to_scam + from_scam
scam =[]
for i in temp:
    if i==0:
        scam.append(0)
    else:
        scam.append(1)
target = pd.DataFrame(scam)
target

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
71245,1
71246,1
71247,1
71248,1


In [15]:
automl = AutoML()
automl.fit(np.array(data), np.array(target), task="classification", metric ='log_loss' , max_iter = 10000, time_budget = None, ensemble= True)

[flaml.automl: 04-24 13:51:37] {2055} INFO - task = classification
[flaml.automl: 04-24 13:51:37] {2057} INFO - Data split method: stratified
[flaml.automl: 04-24 13:51:37] {2061} INFO - Evaluation method: holdout
[flaml.automl: 04-24 13:51:37] {2142} INFO - Minimizing error metric: log_loss
[flaml.automl: 04-24 13:51:37] {2200} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 04-24 13:51:37] {2453} INFO - iteration 0, current learner lgbm
[flaml.automl: 04-24 13:51:38] {2569} INFO - Estimated sufficient time budget=1565s. Estimated necessary time budget=38s.
[flaml.automl: 04-24 13:51:38] {2621} INFO -  at 0.1s,	estimator lgbm's best error=0.3624,	best estimator lgbm's best error=0.3624
[flaml.automl: 04-24 13:51:38] {2453} INFO - iteration 1, current learner lgbm
[flaml.automl: 04-24 13:51:38] {2621} INFO -  at 0.1s,	estimator lgbm's best error=0.3624,	best estimator lgbm's best error=0.3624
[flaml.

In [17]:
automl.best_loss_per_estimator

{'lgbm': 0.08681064203624037,
 'rf': 0.1446640871201777,
 'catboost': 0.09611644517775123,
 'xgboost': 0.09138301965114143,
 'extra_tree': 0.12657459425070108,
 'xgb_limitdepth': 0.09578331373294575,
 'lrl1': inf}

In [18]:
# save the best 3 configuration of each type of model
import pickle

eth_xgboost_mod = automl.best_model_for_estimator('xgboost')
eth_lgbm_mod = automl.best_model_for_estimator('lgbm')
eth_catboost_mod = automl.best_model_for_estimator('catboost')
eth_automl = automl

pickle.dump(eth_xgboost_mod, open('Models/eth_xg.pkl','wb'))
pickle.dump(eth_lgbm_mod, open('Models/eth_lgbm.pkl','wb'))
pickle.dump(eth_catboost_mod, open('Models/eth_catboost.pkl','wb'))
pickle.dump(eth_automl, open('Models/eth_automl.pkl','wb'))

## Credit Card

In [19]:
# read in the data for modeling and remove unneeded columns
data_credit = pd.read_csv('creditcard.csv')
print(data_credit.shape)
data_credit.pop('Time')
target_class = data_credit.pop('Class')
data_credit.head()

(284807, 31)


Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99


In [20]:
# standardize value column
import sklearn
from sklearn.preprocessing import StandardScaler

transformer = StandardScaler().fit(np.array(data_credit['Amount']).reshape(-1,1))
transformed_value = transformer.transform(np.array(data_credit['Amount']).reshape(-1,1)).reshape(1,284807)[0]
data_credit['Amount']=transformed_value
data_credit.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0.244964
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,-0.342475
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,1.160686
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0.140534
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,-0.073403


In [23]:
automl_credit = AutoML()
automl_credit.fit(np.array(data_credit), np.array(target_class), task="classification", metric ='log_loss' , max_iter = None, time_budget = None, ensemble= True)

[flaml.automl: 04-24 13:56:47] {2055} INFO - task = classification
[flaml.automl: 04-24 13:56:47] {2057} INFO - Data split method: stratified
[flaml.automl: 04-24 13:56:47] {2061} INFO - Evaluation method: holdout
[flaml.automl: 04-24 13:56:47] {2142} INFO - Minimizing error metric: log_loss
[flaml.automl: 04-24 13:56:47] {2200} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 04-24 13:56:47] {2453} INFO - iteration 0, current learner lgbm
[flaml.automl: 04-24 13:56:47] {2569} INFO - Estimated sufficient time budget=11145s. Estimated necessary time budget=274s.
[flaml.automl: 04-24 13:56:47] {2621} INFO -  at 0.5s,	estimator lgbm's best error=0.0076,	best estimator lgbm's best error=0.0076
[flaml.automl: 04-24 13:56:47] {2453} INFO - iteration 1, current learner lgbm
[flaml.automl: 04-24 13:56:47] {2621} INFO -  at 0.5s,	estimator lgbm's best error=0.0076,	best estimator lgbm's best error=0.0076
[flam

In [24]:
automl_credit.best_loss_per_estimator

{'lgbm': 0.0033707261253657535,
 'rf': 0.005266324805446211,
 'catboost': 0.0045144455852860425,
 'xgboost': 0.0032221147799141366,
 'extra_tree': 0.004830885924021796,
 'xgb_limitdepth': inf,
 'lrl1': inf}

In [25]:
# save the best 3 configuration of each type of model
credit_xgboost_mod = automl_credit.best_model_for_estimator('xgboost')
credit_lgbm_mod = automl_credit.best_model_for_estimator('lgbm')
credit_extratree_mod = automl_credit.best_model_for_estimator('extra_tree')

pickle.dump(credit_xgboost_mod, open('Models/credit_xg.pkl','wb'))
pickle.dump(credit_lgbm_mod, open('Models/credit_lgbm.pkl','wb'))
pickle.dump(credit_extratree_mod, open('Models/credit_extratree.pkl','wb'))
pickle.dump(automl_credit, open('Models/credit_automl.pkl','wb'))

## Insurance

In [26]:
# read in the data for modeling and remove unneeded columns
data_insurance = pd.read_csv('insurance_claims_preprocessed.csv')
data_insurance.pop('Unnamed: 0')
print(data_insurance.shape)
target_class_insur = data_insurance.pop('target')
data_insurance.head()

(1000, 54)


Unnamed: 0,months_as_customer,policy_deductable,umbrella_limit,capital-gains,capital-loss,incident_hour_of_the_day,number_of_vehicles_involved,bodily_injuries,witnesses,injury_claim,...,collision_type_Side Collision,incident_severity_Minor Damage,incident_severity_Total Loss,incident_severity_Trivial Damage,authorities_contacted_Fire,authorities_contacted_None,authorities_contacted_Other,authorities_contacted_Police,property_damage_YES,police_report_available_YES
0,1.07814,-0.222383,-0.479476,1.011331,0.953851,-0.956261,-0.823865,0.009759,0.461838,-0.189283,...,1,0,0,0,0,0,0,1,1,1
1,0.208995,1.412784,1.69798,-0.901927,0.953851,-0.524475,-0.823865,-1.210174,-1.3387,-1.363822,...,0,1,0,0,0,0,0,1,0,0
2,-0.608002,1.412784,1.69798,0.358023,0.953851,-0.668404,1.140056,1.229693,1.362107,0.054644,...,0,1,0,0,0,0,0,1,0,0
3,0.452355,1.412784,2.133471,0.853388,-1.267577,-0.956261,-0.823865,0.009759,0.461838,-0.22413,...,0,0,0,0,0,0,0,1,0,0
4,0.208995,-0.222383,2.133471,1.46721,-0.683741,1.202666,-0.823865,-1.210174,-0.438431,-1.257232,...,0,1,0,0,0,1,0,0,0,0


In [27]:
automl_insurance = AutoML()
automl_insurance.fit(np.array(data_insurance), np.array(target_class_insur), task="classification",  metric ='log_loss' , max_iter = 10000, time_budget = None, ensemble= True)

[flaml.automl: 04-24 13:59:18] {2055} INFO - task = classification
[flaml.automl: 04-24 13:59:18] {2057} INFO - Data split method: stratified
[flaml.automl: 04-24 13:59:18] {2061} INFO - Evaluation method: cv
[flaml.automl: 04-24 13:59:18] {2142} INFO - Minimizing error metric: log_loss
[flaml.automl: 04-24 13:59:18] {2200} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl: 04-24 13:59:18] {2453} INFO - iteration 0, current learner lgbm
[flaml.automl: 04-24 13:59:18] {2569} INFO - Estimated sufficient time budget=534s. Estimated necessary time budget=13s.
[flaml.automl: 04-24 13:59:18] {2621} INFO -  at 0.1s,	estimator lgbm's best error=0.4902,	best estimator lgbm's best error=0.4902
[flaml.automl: 04-24 13:59:18] {2453} INFO - iteration 1, current learner lgbm
[flaml.automl: 04-24 13:59:18] {2621} INFO -  at 0.1s,	estimator lgbm's best error=0.4902,	best estimator lgbm's best error=0.4902
[flaml.automl

In [28]:
automl_insurance.best_loss_per_estimator

{'lgbm': 0.44853571134451864,
 'rf': 0.46824352537533187,
 'catboost': 0.46395380052291646,
 'xgboost': 0.4460570729821921,
 'extra_tree': 0.4559659311407443,
 'xgb_limitdepth': inf,
 'lrl1': inf}

In [29]:
# save the best 3 configuration of each type of model
insurance_xgboost_mod = automl_insurance.best_model_for_estimator('xgboost')
insurance_lgbm_mod = automl_insurance.best_model_for_estimator('lgbm')
insurance_extratree_mod = automl_insurance.best_model_for_estimator('extra_tree')

pickle.dump(insurance_xgboost_mod, open('Models/insurance_xg.pkl','wb'))
pickle.dump(insurance_lgbm_mod, open('Models/insurance_lgbm.pkl','wb'))
pickle.dump(insurance_extratree_mod, open('Models/insurance_extra.pkl','wb'))
pickle.dump(automl_insurance, open('Models/insurance_automl.pkl','wb'))

# Evaluation
Evaluate the best 3 model configurations for each type of dataset using repeated cross validation. Record the four metrics: F1, Recall, Precision, Roc_auc

In [30]:
import pickle
import pandas as pd

# import best model configurations
eth = pd.read_pickle(r'Models/eth_automl.pkl')
credit = pd.read_pickle(r'Models/credit_automl.pkl')
insurance = pd.read_pickle(r'Models/insurance_automl.pkl')

## Ethereum

In [32]:
from sklearn.model_selection import cross_validate, RepeatedKFold


# 5-fold cross validation with 25 repeats
cv = RepeatedKFold(n_splits=5, n_repeats=25, random_state=1)

# read in best 3 ethereum model configurations and compute metrics on cross-validation splits/repetitions
eth_mods = [eth_xgboost_mod,eth_lgbm_mod,eth_catboost_mod]
scores_per_estimator = []
for i in eth_mods:
    eth_scores = cross_validate(i,data,np.ravel(target),scoring=['f1','recall','precision','roc_auc'],cv=cv)
    scores_per_estimator.append(eth_scores)

In [33]:
# save average metrics across all splits and repeats
eth_results = {}
for i in [0,1,2]:
    eth_f1_mean = np.mean(scores_per_estimator[i]['test_f1'])
    eth_recall_mean = np.mean(scores_per_estimator[i]['test_recall'])
    eth_precision_mean = np.mean(scores_per_estimator[i]['test_precision'])
    eth_roc_mean = np.mean(scores_per_estimator[i]['test_roc_auc'])

    eth_f1_var = np.var(scores_per_estimator[i]['test_f1'])
    eth_recall_var = np.var(scores_per_estimator[i]['test_recall'])
    eth_precision_var = np.var(scores_per_estimator[i]['test_precision'])
    eth_roc_var = np.var(scores_per_estimator[i]['test_roc_auc'])

    eth_f1_max = np.max(scores_per_estimator[i]['test_f1'])
    eth_recall_max = np.max(scores_per_estimator[i]['test_recall'])
    eth_precision_max = np.max(scores_per_estimator[i]['test_precision'])
    eth_roc_max = np.max(scores_per_estimator[i]['test_roc_auc'])

    eth_f1_min = np.min(scores_per_estimator[i]['test_f1'])
    eth_recall_min = np.min(scores_per_estimator[i]['test_recall'])
    eth_precision_min = np.min(scores_per_estimator[i]['test_precision'])
    eth_roc_min = np.min(scores_per_estimator[i]['test_roc_auc'])
    
    eth_results[str(i)] = [[eth_f1_mean,eth_recall_mean,eth_precision_mean,eth_roc_mean],
                          [eth_f1_var,eth_recall_var,eth_precision_var,eth_roc_var],
                          [eth_f1_max,eth_recall_max,eth_precision_max,eth_roc_max],
                          [eth_f1_min,eth_recall_min,eth_precision_min,eth_roc_min]]

## Credit Card

In [35]:
# save the best 3 configuration of each type of model
credit_xgboost_mod = automl_credit.best_model_for_estimator('xgboost')
credit_lgbm_mod = automl_credit.best_model_for_estimator('lgbm')
credit_tree_mod = automl_credit.best_model_for_estimator('extra_tree')

In [36]:
credit_mods = [credit_xgboost_mod,credit_lgbm_mod,credit_tree_mod]
scores_per_estimator_credit = []

for i in credit_mods:
    credit_scores = cross_validate(i,data_credit,np.ravel(target_class),scoring=['f1','recall','precision','roc_auc'],cv=cv)
    scores_per_estimator_credit.append(credit_scores)


In [37]:
# save average metrics across all splits and repeats
# Note: eth names are used in loop to avoid redoing code - doesn't impact credit_results
# save average metrics across all splits and repeats

credit_results = {}
for i in [0,1,2]:
    eth_f1_mean = np.mean(scores_per_estimator_credit[i]['test_f1'])
    eth_recall_mean = np.mean(scores_per_estimator_credit[i]['test_recall'])
    eth_precision_mean = np.mean(scores_per_estimator_credit[i]['test_precision'])
    eth_roc_mean = np.mean(scores_per_estimator_credit[i]['test_roc_auc'])

    eth_f1_var = np.var(scores_per_estimator_credit[i]['test_f1'])
    eth_recall_var = np.var(scores_per_estimator_credit[i]['test_recall'])
    eth_precision_var = np.var(scores_per_estimator_credit[i]['test_precision'])
    eth_roc_var = np.var(scores_per_estimator_credit[i]['test_roc_auc'])

    eth_f1_max = np.max(scores_per_estimator_credit[i]['test_f1'])
    eth_recall_max = np.max(scores_per_estimator_credit[i]['test_recall'])
    eth_precision_max = np.max(scores_per_estimator_credit[i]['test_precision'])
    eth_roc_max = np.max(scores_per_estimator_credit[i]['test_roc_auc'])

    eth_f1_min = np.min(scores_per_estimator_credit[i]['test_f1'])
    eth_recall_min = np.min(scores_per_estimator_credit[i]['test_recall'])
    eth_precision_min = np.min(scores_per_estimator_credit[i]['test_precision'])
    eth_roc_min = np.min(scores_per_estimator_credit[i]['test_roc_auc'])
    
    credit_results[str(i)] = [[eth_f1_mean,eth_recall_mean,eth_precision_mean,eth_roc_mean],
                          [eth_f1_var,eth_recall_var,eth_precision_var,eth_roc_var],
                          [eth_f1_max,eth_recall_max,eth_precision_max,eth_roc_max],
                          [eth_f1_min,eth_recall_min,eth_precision_min,eth_roc_min]]

## Insurance

In [39]:
# map target class Y/N --> 1/0
target_class_insurance =[]
for i in target_class_insur:
    if i=='Y':
        target_class_insurance.append(1)
    else:
        target_class_insurance.append(0)

In [40]:
# 5-fold cross validation with 25 repeats
cv = RepeatedKFold(n_splits=5, n_repeats=25, random_state=1)

insurance_mods = [insurance_xgboost_mod, insurance_lgbm_mod,insurance_extratree_mod]
scores_per_estimator_insurance = []

for i in insurance_mods:
    insurance_scores = cross_validate(i,data_insurance,np.ravel(target_class_insurance),scoring=['f1','recall','precision','roc_auc'],cv=cv)
    scores_per_estimator_insurance.append(insurance_scores)

  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
# save average metrics across all splits and repeats
# Note: eth names are used in loop to avoid redoing code - doesn't impact insurance_results
# save average metrics across all splits and repeats

insurance_results = {}
for i in [0,1,2]:
    eth_f1_mean = np.mean(scores_per_estimator_insurance[i]['test_f1'])
    eth_recall_mean = np.mean(scores_per_estimator_insurance[i]['test_recall'])
    eth_precision_mean = np.mean(scores_per_estimator_insurance[i]['test_precision'])
    eth_roc_mean = np.mean(scores_per_estimator_insurance[i]['test_roc_auc'])

    eth_f1_var = np.var(scores_per_estimator_insurance[i]['test_f1'])
    eth_recall_var = np.var(scores_per_estimator_insurance[i]['test_recall'])
    eth_precision_var = np.var(scores_per_estimator_insurance[i]['test_precision'])
    eth_roc_var = np.var(scores_per_estimator_insurance[i]['test_roc_auc'])

    eth_f1_max = np.max(scores_per_estimator_insurance[i]['test_f1'])
    eth_recall_max = np.max(scores_per_estimator_insurance[i]['test_recall'])
    eth_precision_max = np.max(scores_per_estimator_insurance[i]['test_precision'])
    eth_roc_max = np.max(scores_per_estimator_insurance[i]['test_roc_auc'])

    eth_f1_min = np.min(scores_per_estimator_insurance[i]['test_f1'])
    eth_recall_min = np.min(scores_per_estimator_insurance[i]['test_recall'])
    eth_precision_min = np.min(scores_per_estimator_insurance[i]['test_precision'])
    eth_roc_min = np.min(scores_per_estimator_insurance[i]['test_roc_auc'])
    
    insurance_results[str(i)] = [[eth_f1_mean,eth_recall_mean,eth_precision_mean,eth_roc_mean],
                          [eth_f1_var,eth_recall_var,eth_precision_var,eth_roc_var],
                          [eth_f1_max,eth_recall_max,eth_precision_max,eth_roc_max],
                          [eth_f1_min,eth_recall_min,eth_precision_min,eth_roc_min]]

# Analysis

In [34]:
eth_results_tables = []
# xgboost, lgbm, caboost

for model in [0,1,2]:
    current_mod = eth_results[str(model)]
    eth = pd.DataFrame()
    eth['Mean'] = current_mod[0]
    eth['Variance'] = current_mod[1]
    eth['Max'] = current_mod[2]
    eth['Min'] = current_mod[3]
    eth = eth.transpose()
    eth.rename(columns={0:'F1',1:"Recall",2:"Precision",3:"Roc"},inplace=True)
    eth_results_tables.append(eth)

print("              Ethereum Dataset Results")
print("")
for i in eth_results_tables:
    print(i)
    print("")

              Ethereum Dataset Results

                F1    Recall  Precision       Roc
Mean      0.924622  0.871307   0.984914  0.984653
Variance  0.000012  0.000034   0.000006  0.000002
Max       0.933854  0.887706   0.991189  0.987603
Min       0.914945  0.855674   0.977725  0.979434

                F1    Recall  Precision       Roc
Mean      0.926850  0.872918   0.987910  0.986184
Variance  0.000011  0.000031   0.000005  0.000002
Max       0.935601  0.888049   0.993587  0.989229
Min       0.917102  0.858956   0.979260  0.982005

                F1    Recall  Precision       Roc
Mean      0.891892  0.808379   0.994791  0.964442
Variance  0.000067  0.000168   0.000009  0.000047
Max       0.905851  0.833333   0.999136  0.974125
Min       0.846489  0.743554   0.975067  0.925657



In [38]:
credit_results_tables = []
# # xgboost, lgbm, extra tree

for model in [0,1,2]:
    current_mod = credit_results[str(model)]
    eth = pd.DataFrame()
    eth['Mean'] = current_mod[0]
    eth['Variance'] = current_mod[1]
    eth['Max'] = current_mod[2]
    eth['Min'] = current_mod[3]
    eth = eth.transpose()
    eth.rename(columns={0:'F1',1:"Recall",2:"Precision",3:"Roc"},inplace=True)
    credit_results_tables.append(eth)

print("              Credit Card Dataset Results")
print("")
for i in credit_results_tables:
    print(i)
    print("")

              Credit Card Dataset Results

                F1    Recall  Precision       Roc
Mean      0.862059  0.793792   0.944961  0.980260
Variance  0.000671  0.001690   0.000579  0.000063
Max       0.936170  0.897959   0.988764  0.998594
Min       0.795455  0.686275   0.880952  0.954890

                F1    Recall  Precision       Roc
Mean      0.850000  0.776845   0.940211  0.981316
Variance  0.000704  0.001646   0.000690  0.000063
Max       0.918033  0.890110   0.988235  0.998851
Min       0.779661  0.674419   0.870588  0.958277

                F1    Recall  Precision       Roc
Mean      0.771283  0.658562   0.935706  0.934654
Variance  0.001952  0.003674   0.001061  0.000473
Max       0.861111  0.790909   1.000000  0.975743
Min       0.633094  0.483516   0.852459  0.867863



In [42]:
insurance_results_tables = []
# xgboost, lgbm, extra tree

for model in [0,1,2]:
    current_mod = insurance_results[str(model)]
    eth = pd.DataFrame()
    eth['Mean'] = current_mod[0]
    eth['Variance'] = current_mod[1]
    eth['Max'] = current_mod[2]
    eth['Min'] = current_mod[3]
    eth = eth.transpose()
    eth.rename(columns={0:'F1',1:"Recall",2:"Precision",3:"Roc"},inplace=True)
    insurance_results_tables.append(eth)

print("              Insurance Dataset Results")
print("")
for i in insurance_results_tables:
    print(i)
    print("")

              Insurance Dataset Results

                F1    Recall  Precision       Roc
Mean      0.553093  0.527868   0.596202  0.773346
Variance  0.006228  0.010862   0.004363  0.001483
Max       0.700000  0.727273   0.750000  0.860251
Min       0.172414  0.111111   0.384615  0.601977

                F1    Recall  Precision       Roc
Mean      0.522044  0.482411   0.587353  0.760869
Variance  0.004551  0.008937   0.004530  0.001703
Max       0.660194  0.659091   0.818182  0.847856
Min       0.281690  0.196078   0.404255  0.606533

                F1    Recall  Precision       Roc
Mean      0.249582  0.185983   0.585752  0.767772
Variance  0.028441  0.024886   0.039427  0.001865
Max       0.666667  0.681818   1.000000  0.861468
Min       0.000000  0.000000   0.000000  0.606020

