In [1]:
#Ignore 'Future Warnings'
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Set Base for Algorithms

In [2]:
#Import Necessary Packages
from tqdm import tqdm
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import sklearn
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import plot_confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

In [3]:
#Set Psudeorandom Seed
seed = 42

In [4]:
splits = 5
kf = StratifiedKFold(n_splits=splits, random_state=seed, shuffle=True)

In [5]:
pca_target = .8

In [6]:
#Models to be used
models = []
models.append(('RTREE2', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
 max_depth=4, max_features='auto', max_leaf_nodes=None,
 min_impurity_split=1e-07, min_samples_leaf=1,
 min_samples_split=.04, min_weight_fraction_leaf=0.0,
 n_estimators=64, n_jobs=1, oob_score=False,
 verbose=0, warm_start=False, random_state=seed)))

methods = []
for name, model in models:
    methods.append(name)

# Alternative Specification Assessments

In [7]:
# Load dataset
files = []

files.append(('One Year Alt','../data/1a_offset.xlsx'))
files.append(('Two Year Alt','../data/2a_offset.xlsx'))
files.append(('Three Year Alt','../data/3a_offset.xlsx'))
files.append(('Five Year Alt','../data/5a_offset.xlsx'))
files.append(('Ten Year Alt','../data/10a_offset.xlsx'))

## Alternative Specification

In [8]:
df_a_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_a_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_a_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [9]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])
 
    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')
        
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_a_a[name+' mean'] = algo_a_mean
    df_a_a[name+' std'] = algo_a_std
    df_a_r[name+' mean'] = algo_r_mean
    df_a_r[name+' std'] = algo_r_std
    df_a_f[name+' mean'] = algo_f_mean
    df_a_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:29<00:00,  5.89s/it]


## Alternative Specification w/ PCA

In [10]:
df_ap_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_ap_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_ap_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [11]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])

    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')

    pca = PCA(pca_target)
    principalComponents = pca.fit_transform(X)
    
    X = pca.fit_transform(X)
        
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_ap_a[name+' mean'] = algo_a_mean
    df_ap_a[name+' std'] = algo_a_std
    df_ap_r[name+' mean'] = algo_r_mean
    df_ap_r[name+' std'] = algo_r_std
    df_ap_f[name+' mean'] = algo_f_mean
    df_ap_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:27<00:00,  5.44s/it]


## Alternative Specification w/ Y/R/C dummies

In [12]:
df_ad_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_ad_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_ad_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [13]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])
    
    #Creation of Year/Region/Country Dummies
    year_dummies = pd.get_dummies(df.year, prefix='year').iloc[:,1:]
    region_dummies = pd.get_dummies(df.region, prefix='region').iloc[:,1:]
    country_dummies = pd.get_dummies(df.ccode, prefix='country: ').iloc[:,1:]
    
    df = pd.concat([df, year_dummies, region_dummies, country_dummies], axis = 1)
    
    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')
    
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_ad_a[name+' mean'] = algo_a_mean
    df_ad_a[name+' std'] = algo_a_std
    df_ad_r[name+' mean'] = algo_r_mean
    df_ad_r[name+' std'] = algo_r_std
    df_ad_f[name+' mean'] = algo_f_mean
    df_ad_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:35<00:00,  7.05s/it]


## Alternative Specification w/ Y/R/C dummies and PCA

In [14]:
df_apd_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_apd_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_apd_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [15]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])
    
    #Creation of Year/Region/Country Dummies
    year_dummies = pd.get_dummies(df.year, prefix='year').iloc[:,1:]
    region_dummies = pd.get_dummies(df.region, prefix='region').iloc[:,1:]
    country_dummies = pd.get_dummies(df.ccode, prefix='country: ').iloc[:,1:]
    
    df = pd.concat([df, year_dummies, region_dummies, country_dummies], axis = 1)
    
    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')

    pca = PCA(pca_target)
    principalComponents = pca.fit_transform(X)
    
    X = pca.fit_transform(X)
        
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_apd_a[name+' mean'] = algo_a_mean
    df_apd_a[name+' std'] = algo_a_std
    df_apd_r[name+' mean'] = algo_r_mean
    df_apd_r[name+' std'] = algo_r_std
    df_apd_f[name+' mean'] = algo_f_mean
    df_apd_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:50<00:00, 10.05s/it]


# Assessment of Conflict Type on Alternative Specification

In [16]:
files = []

files.append(('State Based','../data/con_1.xlsx'))
files.append(('Non State','../data/con_2.xlsx'))
files.append(('One Sided','../data/con_3.xlsx'))

## Con Type Specification

In [17]:
df_c_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_c_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_c_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [18]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])
    
    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')
      
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_c_a[name+' mean'] = algo_a_mean
    df_c_a[name+' std'] = algo_a_std
    df_c_r[name+' mean'] = algo_r_mean
    df_c_r[name+' std'] = algo_r_std
    df_c_f[name+' mean'] = algo_f_mean
    df_c_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:18<00:00,  6.23s/it]


## Con Type Specification w/  PCA

In [19]:
df_cp_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_cp_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_cp_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [20]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])

    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')

    pca = PCA(pca_target)
    principalComponents = pca.fit_transform(X)
    
    X = pca.fit_transform(X)
        
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_cp_a[name+' mean'] = algo_a_mean
    df_cp_a[name+' std'] = algo_a_std
    df_cp_r[name+' mean'] = algo_r_mean
    df_cp_r[name+' std'] = algo_r_std
    df_cp_f[name+' mean'] = algo_f_mean
    df_cp_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:19<00:00,  6.40s/it]


## Con Type Specification w/ Y/R/C

In [21]:
df_cd_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_cd_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_cd_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [22]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])
    
    #Creation of Year/Region/Country Dummies
    year_dummies = pd.get_dummies(df.year, prefix='year').iloc[:,1:]
    region_dummies = pd.get_dummies(df.region, prefix='region').iloc[:,1:]
    country_dummies = pd.get_dummies(df.ccode, prefix='country: ').iloc[:,1:]
    
    df = pd.concat([df, year_dummies, region_dummies, country_dummies], axis = 1)
    
    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')
        
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_cd_a[name+' mean'] = algo_a_mean
    df_cd_a[name+' std'] = algo_a_std
    df_cd_r[name+' mean'] = algo_r_mean
    df_cd_r[name+' std'] = algo_r_std
    df_cd_f[name+' mean'] = algo_f_mean
    df_cd_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:22<00:00,  7.56s/it]


## Con Type Specification w/ Y/R/C dummies and PCA

In [23]:
df_cpd_a = pd.DataFrame(methods, columns = ['Algorithm'])
df_cpd_r = pd.DataFrame(methods, columns = ['Algorithm'])
df_cpd_f = pd.DataFrame(methods, columns = ['Algorithm'])

In [24]:
for name, file in tqdm(files):
    #print('Fitting for '+name)
    df = pd.read_excel (file,index_col=0,na_values=['..'])
    
    #Creation of Year/Region/Country Dummies
    year_dummies = pd.get_dummies(df.year, prefix='year').iloc[:,1:]
    region_dummies = pd.get_dummies(df.region, prefix='region').iloc[:,1:]
    country_dummies = pd.get_dummies(df.ccode, prefix='country: ').iloc[:,1:]
    
    df = pd.concat([df, year_dummies, region_dummies, country_dummies], axis = 1)
    
    algo_a_mean = []
    algo_a_std = []
    algo_r_mean = []
    algo_r_std = []
    algo_f_mean = []
    algo_f_std = []

   
    #Create testing and training sets
    array = df.values

    #Create X array
    X= array[:,4:]

    # Standardizing the features
    X = StandardScaler().fit_transform(X)

    #Create Y array
    Y= array[:,3]
    Y=Y.astype('int')

    pca = PCA(pca_target)
    principalComponents = pca.fit_transform(X)
    
    X = pca.fit_transform(X)
        
    for algo, model in models:   
        accuracy = np.array([])
        recall = np.array([])
        f1 = np.array([])
        for fold, (train_index, test_index) in enumerate(kf.split(X, Y), 1):
            X_train = X[train_index]
            Y_train = Y[train_index]  
            X_test = X[test_index]
            Y_test = Y[test_index]  
            
            #train/test split
            sm = SMOTE()
            X_train_oversampled, Y_train_oversampled = sm.fit_sample(X_train, Y_train)
           
            #fit model
            model = model
            model.fit(X_train_oversampled, Y_train_oversampled)  
            Y_pred = model.predict(X_test)
            
            #generate reports
            acc = accuracy_score(Y_test, Y_pred)
            rec = recall_score(Y_test, Y_pred)
            f=  f1_score(Y_test, Y_pred)
            accuracy = np.append(accuracy, acc)
            recall = np.append(recall, rec)
            f1 = np.append(f1, f)
        a_mean = accuracy.mean().tolist()
        a_std = accuracy.std().tolist()
        r_mean = recall.mean().tolist()
        r_std = accuracy.std().tolist()
        f_mean = f1.mean().tolist()
        f_std = accuracy.std().tolist()
        algo_a_mean.append(a_mean)
        algo_a_std.append(a_std)
        algo_r_mean.append(r_mean)
        algo_r_std.append(r_std)
        algo_f_mean.append(f_mean)
        algo_f_std.append(f_std)
    df_cpd_a[name+' mean'] = algo_a_mean
    df_cpd_a[name+' std'] = algo_a_std
    df_cpd_r[name+' mean'] = algo_r_mean
    df_cpd_r[name+' std'] = algo_r_std
    df_cpd_f[name+' mean'] = algo_f_mean
    df_cpd_f[name+' std'] = algo_f_std

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.66s/it]


In [25]:
#save to excel file
df_a_a.to_excel('../data/a2_acv.xlsx')
df_a_r.to_excel('../data/a2_rcv.xlsx')
df_a_f.to_excel('../data/a2_fcv.xlsx')

df_c_a.to_excel('../data/c2_acv.xlsx')
df_c_r.to_excel('../data/c2_rcv.xlsx')
df_c_f.to_excel('../data/c2_fcv.xlsx')

df_ap_a.to_excel('../data/ap2_acv.xlsx')
df_ap_r.to_excel('../data/ap2_rcv.xlsx')
df_ap_f.to_excel('../data/ap2_fcv.xlsx')

df_cp_a.to_excel('../data/cp2_acv.xlsx')
df_cp_r.to_excel('../data/cp2_rcv.xlsx')
df_cp_f.to_excel('../data/cp2_fcv.xlsx')

df_ad_a.to_excel('../data/ad2_acv.xlsx')
df_ad_r.to_excel('../data/ad2_rcv.xlsx')
df_ad_f.to_excel('../data/ad2_fcv.xlsx')

df_cd_a.to_excel('../data/cd2_acv.xlsx')
df_cd_r.to_excel('../data/cd2_rcv.xlsx')
df_cd_f.to_excel('../data/cd2_fcv.xlsx')

df_apd_a.to_excel('../data/apd2_acv.xlsx')
df_apd_r.to_excel('../data/apd2_rcv.xlsx')
df_apd_f.to_excel('../data/apd2_fcv.xlsx')

df_cpd_a.to_excel('../data/cpd2_acv.xlsx')
df_cpd_r.to_excel('../data/cpd2_rcv.xlsx')
df_cpd_f.to_excel('../data/cpd2_fcv.xlsx')