In [1]:
import warnings
from copy import deepcopy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import imblearn.over_sampling

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
# from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.naive_bayes import GaussianNB

from sklearn import metrics
from sklearn.metrics import average_precision_score
# from sklearn.metrics import roc_curve
# from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score
# from sklearn.metrics import precision_recall_curve

In [4]:
from xgboost import XGBClassifier

In [5]:
# from notebook_pbar import * # import my notebook_pbar.py file
# timelist = timelist # import the default variables timelist and then_time
# then_time = then_time

In [6]:
# function cm_val creates an interactive confusion matrix on un-scaled data.
# function cm_val_scaled creates an interactive confusion matrix on scaled data.
from my_functions import cm_val
from my_functions import cm_val_scaled
# function y_pred_inverse extracts the predictive probability from predict_proba.
from my_functions import y_pred_inverse
# function plot_validation_curve_log plots a validation curve on a log scale.
# function plot_validation_curve_reg plots a validation curve on a default scale.
from my_functions import plot_validation_curve_log
from my_functions import plot_validation_curve_reg
# function plot_learning_curve_reg plots a learning curve on a default scale.
from my_functions import plot_learning_curve
# function plot_decision_tree uses graphviz to visualize the splits of a devision tree.
from my_functions import plot_decision_tree
# function train_and_calibrate_cv performs stratified shuffle split on a specified model,
# returning validation scores and roc/auc.
from my_functions import train_and_calibrate_cv

In [7]:
warnings.filterwarnings('ignore')

In [8]:
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

In [9]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [10]:
np.random.seed(42)

In [11]:
df = pd.read_pickle('pickle/df_modeling.pkl')

In [12]:
len(df)

8866

In [13]:
df.head()

Unnamed: 0,cow_code,state_name,year,num_trade_states,export_dollars,import_dollars,military_expenditure,military_personnel,prim_energy_consumption,total_pop,cinc_score,num_alliances,pre_1816_alliances,num_in_effect_1231_2012,defense_treaties,neutrality_treaties,nonaggression_treaties,entente_treaties,num_conflicts,avg_cum_duration,num_wars,ongoing_2010,revision_pct,avg_hostility_level,num_leadership_changes,leader_tenure,age_govt,num_transitions_ever,mtco2,export_import_ratio,previous_mtco2,export_dollars_change,export_dollars_change_pct,import_dollars_change,import_dollars_change_pct,military_expenditure_change,military_expenditure_change_pct,military_personnel_change,military_personnel_change_pct,export_import_ratio_change,export_import_ratio_change_pct,prim_energy_consumption_change,prim_energy_consumption_change_pct,total_pop_change,total_pop_change_pct,mtco2_change,mtco2_change_pct,cinc_score_change,Americas,Asia,Europe,Oceania,Indirect election,Nonelective,No legislature exists,Non‐elective legislature,Appointed,Closed,Elected,All parties legally banned,Legally single party state,Multiple parties legally allowed,Multiple parties,No parties,One party,Multiple parties outside regime,No parties outside regime,One party outside regime,Legislature with multiple parties,No legislature or all nonpartisan,Only members from regime party,Rules rewritten unconstitutionally,collective_leadership,military_leader,royal_leader,nominal_vs_eff_diff,communist_leader,leader_died,democratic_regime,cabinet_assembly,popular_election,Civilian dictatorship,Military dictatorship,Mixed (semi‐presidential) democracy,Parliamentary democracy,Presidential democracy,Royal dictatorship,transition_to_democracy,transition_to_dictatorship,war_present,conflict_present,40s,50s,60s,70s,80s,90s
0,2,United States of America,1947,67.0,76000000.0,11343750000.0,14315999000.0,1583000.0,2631554000000.0,144126000.0,0.31,39.0,0.0,19.0,39.0,0.0,20.0,39.0,0.0,0.0,0.0,0.0,0.0,1.0,0,3,78.0,0,2480.766,0.007,2253.03,-84000000.0,0.475,0.0,1.0,-30817985000.0,0.317,-1447000.0,0.522,-0.007,0.475,255266000000.0,1.107,2737000.0,1.019,227.736,1.101,-0.054,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
1,390,Denmark,1947,67.0,616200000.0,484590000.0,59412000.0,14000.0,17814000000.0,4150000.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1,47.0,0,21.456,1.272,17.646,0.0,1.0,0.0,1.0,7023000.0,1.134,0.0,1.0,0.0,1.0,3422000000.0,1.238,50000.0,1.012,3.811,1.216,0.0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0
2,94,Costa Rica,1947,67.0,39700000.0,35990000.0,1411000.0,1000.0,38000000.0,730000.0,0.0,39.0,0.0,19.0,39.0,0.0,20.0,39.0,0.0,0.0,0.0,0.0,0.0,1.0,0,4,29.0,1,0.286,1.103,0.286,0.0,1.0,0.0,1.0,237000.0,1.202,0.0,1.0,0.0,1.0,2000000.0,1.056,20000.0,1.028,0.0,1.0,-0.0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
3,395,Iceland,1947,67.0,67200000.0,42760000.0,7593960.688,0.0,28000000.0,134000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1,1,4.0,0,0.004,1.572,0.011,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,4000000.0,1.167,2000.0,1.015,-0.007,0.336,-0.0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
4,93,Nicaragua,1947,67.0,25080000.0,17820000.0,7037000.0,3000.0,18000000.0,977000.0,0.0,39.0,0.0,19.0,39.0,0.0,20.0,39.0,0.0,0.0,0.0,0.0,0.0,1.0,0,11,11.0,0,0.007,1.407,0.004,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,28000.0,1.03,0.004,1.973,-0.0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [14]:
df.drop(['cow_code', 'year', 'state_name', 'export_import_ratio', 'avg_hostility_level',
         'military_expenditure', 'military_personnel', 'num_wars', 'num_conflicts',
         'war_present', 'revision_pct', 'avg_cum_duration', 'num_trade_states',
         'export_dollars', 'import_dollars', 'prim_energy_consumption',
         'total_pop', 'cinc_score', 'export_dollars_change', 'export_dollars_change_pct',
         'import_dollars_change', 'import_dollars_change_pct', 'num_alliances',
         'pre_1816_alliances', 'num_in_effect_1231_2012', 'defense_treaties',
         'neutrality_treaties', 'nonaggression_treaties', 'entente_treaties',
         'leader_tenure', 'age_govt', 'num_transitions_ever',
         'num_leadership_changes', 'military_expenditure_change',
         'military_expenditure_change_pct', 'military_personnel_change',
         'military_personnel_change_pct', 'export_import_ratio_change',
         'export_import_ratio_change_pct', 'prim_energy_consumption_change',
         'prim_energy_consumption_change_pct', 'cinc_score_change',
         'total_pop_change', 'total_pop_change_pct', 'mtco2',
         'mtco2_change_pct', 'mtco2_change'], axis = 1).sum()

ongoing_2010                              19.850
previous_mtco2                        973435.691
Americas                                1809.000
Asia                                    2278.000
Europe                                  1937.000
Oceania                                  468.000
Indirect election                       3671.000
Nonelective                             2431.000
No legislature exists                    969.000
Non‐elective legislature                 435.000
Appointed                                373.000
Closed                                  1141.000
Elected                                 7321.000
All parties legally banned               945.000
Legally single party state              1037.000
Multiple parties legally allowed        6864.000
Multiple parties                        6605.000
No parties                               908.000
One party                               1333.000
Multiple parties outside regime         6216.000
No parties outside r

In [15]:
# columns to remove if overfitting
# 'military_expenditure', 'military_expenditure_change', 'military_expenditure_change_pct',
# 'military_personnel', 'military_personnel_change', 'military_personnel_change_pct', 'export_import_ratio'

In [16]:
X = df.drop(['cow_code', 'year', 'state_name', 'avg_hostility_level', 'num_wars',
             'num_conflicts', 'num_wars', 'num_conflicts', 'war_present',
             'conflict_present', 'revision_pct', 'avg_cum_duration', 'ongoing_2010'], axis = 1)
y = df['conflict_present']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 101)

In [17]:
df['conflict_present'].value_counts()

0    6108
1    2758
Name: conflict_present, dtype: int64

In [18]:
# from sklearn.decomposition import PCA

In [19]:
# pca = PCA(n_components=2)
# pca.fit(df[['import_dollars', 'export_dollars', 'export_import_ratio']])

In [20]:
continuous_columns = ['num_trade_states', 'export_dollars', 'import_dollars',
                      'prim_energy_consumption', 'total_pop', 'cinc_score',
                      'num_alliances', 'pre_1816_alliances', 'num_in_effect_1231_2012',
                      'defense_treaties', 'neutrality_treaties', 'nonaggression_treaties',
                      'entente_treaties', 'leader_tenure', 'age_govt', 'num_transitions_ever',
                      'num_leadership_changes', 'military_personnel', 'military_personnel_change',
                      'military_personnel_change_pct', 'export_import_ratio',
                      'export_import_ratio_change', 'export_import_ratio_change_pct',
                      'military_expenditure', 'military_expenditure_change',
                      'military_expenditure_change_pct', 'export_dollars_change',
                      'export_dollars_change_pct','import_dollars_change',
                      'import_dollars_change_pct', 'prim_energy_consumption_change',
                      'prim_energy_consumption_change_pct', 'total_pop_change',
                      'total_pop_change_pct', 'cinc_score_change', 'mtco2',
                      'mtco2_change_pct', 'mtco2_change']

In [22]:
X_train_cont = X_train[continuous_columns]
X_train_cont = X_train_cont.reset_index()
X_train_cont.drop(['index'], axis = 1, inplace = True)

X_train_dummy = X_train.drop(continuous_columns, axis = 1)
X_train_dummy = X_train_dummy.reset_index()
X_train_dummy.drop(['index'], axis = 1, inplace = True)

X_test_cont = X_test[continuous_columns]
X_test_cont = X_test_cont.reset_index()
X_test_cont.drop(['index'], axis = 1, inplace = True)

X_test_dummy = X_test.drop(continuous_columns, axis = 1)
X_test_dummy = X_test_dummy.reset_index()
X_test_dummy.drop(['index'], axis = 1, inplace = True)

In [23]:
scaler = MinMaxScaler()

X_train_cont_scaled = pd.DataFrame(scaler.fit_transform(X_train_cont))
X_train_cont_scaled = X_train_cont_scaled.reset_index()
X_train_cont_scaled.drop(['index'], axis = 1, inplace = True)
X_train_cont_scaled.columns = continuous_columns

X_test_cont_scaled = pd.DataFrame(scaler.transform(X_test_cont))
X_test_cont_scaled = X_test_cont_scaled.reset_index()
X_test_cont_scaled.drop(['index'], axis = 1, inplace = True)
X_test_cont_scaled.columns = continuous_columns

X_train_scaled = pd.concat([X_train_cont_scaled, X_train_dummy], axis = 1)
X_test_scaled = pd.concat([X_test_cont_scaled, X_test_dummy], axis = 1)

In [24]:
pd.DataFrame(y_train)['conflict_present'].value_counts()

0    4898
1    2194
Name: conflict_present, dtype: int64

In [25]:
smote = imblearn.over_sampling.SMOTE(ratio = {0: 4908, 1: (4908)}, random_state = 101)
X_train_scaled, y_train = smote.fit_sample(X_train_scaled, y_train)

X_train_scaled = pd.DataFrame(X_train_scaled)
X_train_scaled.columns = list(continuous_columns) + list(X_train_dummy.columns)
y_train = pd.DataFrame(y_train)
y_train.columns = ['conflict_present']

In [26]:
print(X_train_scaled.shape)
print(X_test_scaled.shape)
print(y_train.shape)
print(y_test.shape)

(9816, 86)
(1774, 86)
(9816, 1)
(1774,)


In [42]:
lr = LogisticRegression(penalty = 'l1')
lr.fit(X_train_scaled, y_train)
train_predictions = lr.predict(X_train_scaled)
test_predictions = lr.predict(X_test_scaled)

In [43]:
lr_coef_df = pd.DataFrame(np.e**lr.coef_)
lr_coef_df.columns = list(continuous_columns) + list(X_train_dummy.columns)
lr_coef_df

Unnamed: 0,num_trade_states,export_dollars,import_dollars,prim_energy_consumption,total_pop,cinc_score,num_alliances,pre_1816_alliances,num_in_effect_1231_2012,defense_treaties,neutrality_treaties,nonaggression_treaties,entente_treaties,leader_tenure,age_govt,num_transitions_ever,num_leadership_changes,military_personnel,military_personnel_change,military_personnel_change_pct,export_import_ratio,export_import_ratio_change,export_import_ratio_change_pct,military_expenditure,military_expenditure_change,military_expenditure_change_pct,export_dollars_change,export_dollars_change_pct,import_dollars_change,import_dollars_change_pct,prim_energy_consumption_change,prim_energy_consumption_change_pct,total_pop_change,total_pop_change_pct,cinc_score_change,mtco2,mtco2_change_pct,mtco2_change,previous_mtco2,Americas,Asia,Europe,Oceania,Indirect election,Nonelective,No legislature exists,Non‐elective legislature,Appointed,Closed,Elected,All parties legally banned,Legally single party state,Multiple parties legally allowed,Multiple parties,No parties,One party,Multiple parties outside regime,No parties outside regime,One party outside regime,Legislature with multiple parties,No legislature or all nonpartisan,Only members from regime party,Rules rewritten unconstitutionally,collective_leadership,military_leader,royal_leader,nominal_vs_eff_diff,communist_leader,leader_died,democratic_regime,cabinet_assembly,popular_election,Civilian dictatorship,Military dictatorship,Mixed (semi‐presidential) democracy,Parliamentary democracy,Presidential democracy,Royal dictatorship,transition_to_democracy,transition_to_dictatorship,40s,50s,60s,70s,80s,90s
0,0.718,1.0,10.915,1.0,0.347,1.0,1.0,2.611,2.042,0.502,4.845,1.768,1.144,0.514,2.054,8.438,1.0,540741506.78,1.0,23.162,1.0,1.0,1.0,1547.642,1.0,1.0,0.989,1.0,0.619,3.331,0.799,1.0,1.0,1.0,1.0,1.0,1.0,0.561,0.999,0.364,2.219,0.526,0.344,0.874,1.062,1.0,0.79,1.154,1.215,0.833,1.51,0.611,0.807,0.947,0.665,1.263,1.005,0.78,0.939,1.007,0.916,0.996,1.085,0.226,2.454,0.995,0.937,0.703,0.975,0.846,2.05,1.019,1.475,0.532,0.598,0.949,1.245,0.56,0.822,0.711,0.868,1.383,1.326,1.0,1.047,1.453


In [44]:
print(roc_auc_score(y_train, train_predictions).round(3), '\n')
print(metrics.classification_report(y_train, train_predictions))
print(metrics.confusion_matrix(y_train, train_predictions))
print('\n')
print(roc_auc_score(y_test, test_predictions).round(3), '\n')
print(metrics.classification_report(y_test, test_predictions))
print(metrics.confusion_matrix(y_test, test_predictions))

0.715 

              precision    recall  f1-score   support

           0       0.69      0.77      0.73      4908
           1       0.74      0.66      0.70      4908

   micro avg       0.71      0.71      0.71      9816
   macro avg       0.72      0.71      0.71      9816
weighted avg       0.72      0.71      0.71      9816

[[3784 1124]
 [1676 3232]]


0.703 

              precision    recall  f1-score   support

           0       0.82      0.76      0.79      1210
           1       0.56      0.64      0.60       564

   micro avg       0.72      0.72      0.72      1774
   macro avg       0.69      0.70      0.69      1774
weighted avg       0.74      0.72      0.73      1774

[[923 287]
 [201 363]]


In [45]:
lr = LogisticRegression(penalty = 'l2')
lr.fit(X_train_scaled, y_train)
train_predictions = lr.predict(X_train_scaled)
test_predictions = lr.predict(X_test_scaled)

In [46]:
lr_coef_df = pd.DataFrame(np.e**lr.coef_)
lr_coef_df.columns = list(continuous_columns) + list(X_train_dummy.columns)
lr_coef_df

Unnamed: 0,num_trade_states,export_dollars,import_dollars,prim_energy_consumption,total_pop,cinc_score,num_alliances,pre_1816_alliances,num_in_effect_1231_2012,defense_treaties,neutrality_treaties,nonaggression_treaties,entente_treaties,leader_tenure,age_govt,num_transitions_ever,num_leadership_changes,military_personnel,military_personnel_change,military_personnel_change_pct,export_import_ratio,export_import_ratio_change,export_import_ratio_change_pct,military_expenditure,military_expenditure_change,military_expenditure_change_pct,export_dollars_change,export_dollars_change_pct,import_dollars_change,import_dollars_change_pct,prim_energy_consumption_change,prim_energy_consumption_change_pct,total_pop_change,total_pop_change_pct,cinc_score_change,mtco2,mtco2_change_pct,mtco2_change,previous_mtco2,Americas,Asia,Europe,Oceania,Indirect election,Nonelective,No legislature exists,Non‐elective legislature,Appointed,Closed,Elected,All parties legally banned,Legally single party state,Multiple parties legally allowed,Multiple parties,No parties,One party,Multiple parties outside regime,No parties outside regime,One party outside regime,Legislature with multiple parties,No legislature or all nonpartisan,Only members from regime party,Rules rewritten unconstitutionally,collective_leadership,military_leader,royal_leader,nominal_vs_eff_diff,communist_leader,leader_died,democratic_regime,cabinet_assembly,popular_election,Civilian dictatorship,Military dictatorship,Mixed (semi‐presidential) democracy,Parliamentary democracy,Presidential democracy,Royal dictatorship,transition_to_democracy,transition_to_dictatorship,40s,50s,60s,70s,80s,90s
0,0.631,2.551,3.449,1.624,2.7,11.354,0.919,2.663,2.189,0.429,5.269,1.534,1.694,0.584,2.235,9.224,1.167,2270.743,1.616,15.291,1.494,0.884,0.934,4.028,1.111,0.923,0.808,0.807,0.777,2.442,0.581,0.592,2.043,0.896,1.394,0.893,0.67,0.399,1.0,0.343,2.659,0.612,0.347,0.872,1.196,0.926,0.754,1.094,1.124,0.753,1.714,0.611,0.9,0.942,0.733,1.364,1.174,0.733,1.094,1.03,0.889,1.011,1.028,0.235,2.559,0.728,0.911,0.807,0.953,0.799,2.208,1.107,1.623,0.616,0.594,0.968,1.391,0.728,0.829,0.634,0.764,1.224,1.306,0.99,1.051,1.45


In [32]:
print(roc_auc_score(y_train, train_predictions).round(3), '\n')
print(metrics.classification_report(y_train, train_predictions))
print(metrics.confusion_matrix(y_train, train_predictions))
print('\n')
print(roc_auc_score(y_test, test_predictions).round(3), '\n')
print(metrics.classification_report(y_test, test_predictions))
print(metrics.confusion_matrix(y_test, test_predictions))

0.707 

              precision    recall  f1-score   support

           0       0.69      0.76      0.72      4908
           1       0.73      0.66      0.69      4908

   micro avg       0.71      0.71      0.71      9816
   macro avg       0.71      0.71      0.71      9816
weighted avg       0.71      0.71      0.71      9816

[[3720 1188]
 [1690 3218]]


0.695 

              precision    recall  f1-score   support

           0       0.82      0.74      0.78      1210
           1       0.54      0.65      0.59       564

   micro avg       0.71      0.71      0.71      1774
   macro avg       0.68      0.70      0.68      1774
weighted avg       0.73      0.71      0.72      1774

[[900 310]
 [199 365]]


In [33]:
cv = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 101)
param_grid = {'n_estimators': [400], 'max_depth': np.arange(3, 10), 'max_features': np.arange(3, 10), 'min_samples_split': np.arange(3, 7), 'min_samples_leaf': np.arange(2, 7), 'bootstrap': [True, False]}
rand = RandomizedSearchCV(RandomForestClassifier(random_state = 101), param_distributions = param_grid, cv = cv, scoring = 'roc_auc', refit = True, random_state = 101)
rand.fit(X_train_scaled, y_train)

train_predictions = rand.predict(X_train_scaled)
test_predictions = rand.predict(X_test_scaled)
print(rand.cv_results_['mean_train_score'].mean())
print(rand.cv_results_['mean_test_score'].mean())
print('')
print(rand.best_params_)

0.875407815321575
0.8490047520185676

{'n_estimators': 400, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 6, 'max_depth': 8, 'bootstrap': False}


In [34]:
print(roc_auc_score(y_train, train_predictions).round(3), '\n')
print(metrics.classification_report(y_train, train_predictions))
print(metrics.confusion_matrix(y_train, train_predictions))
print('\n')
print(roc_auc_score(y_test, test_predictions).round(3), '\n')
print(metrics.classification_report(y_test, test_predictions))
print(metrics.confusion_matrix(y_test, test_predictions))

0.854 

              precision    recall  f1-score   support

           0       0.84      0.87      0.86      4908
           1       0.86      0.84      0.85      4908

   micro avg       0.85      0.85      0.85      9816
   macro avg       0.85      0.85      0.85      9816
weighted avg       0.85      0.85      0.85      9816

[[4259  649]
 [ 784 4124]]


0.756 

              precision    recall  f1-score   support

           0       0.85      0.83      0.84      1210
           1       0.65      0.69      0.67       564

   micro avg       0.78      0.78      0.78      1774
   macro avg       0.75      0.76      0.75      1774
weighted avg       0.79      0.78      0.78      1774

[[1000  210]
 [ 177  387]]


In [None]:
cv = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 101)
fbeta = make_scorer(fbeta_score, average = 'weighted', beta = 0.5)

params_grid = {
    'n_estimators': [400],
    'learning_rate': [0.5],
    'max_depth': [6, 7, 8],
    'max_features': np.arange(3, 7),
    'min_samples_split': np.arange(6, 7),
    'min_samples_leaf': np.arange(2, 7)
}

params_fixed = {
    'objective':'binary:logistic',
    'silent': 1
}

best_grid = GridSearchCV(
    estimator = XGBClassifier(**params_fixed, seed = 42),
    param_grid = params_grid,
    cv = cv,
    scoring = fbeta

)

In [None]:
best_grid.fit(X_train_scaled, y_train)

In [None]:
print("Best accuracy obtained {0}".format(best_grid.best_score_))
print("Parameters:")
for key, value in best_grid.best_params_.items():
    print("\t{}: {}".format(key, value))

In [None]:
predictions = best_grid.predict(X_test_scaled)

In [None]:
# created new confusion matrix for tuned model.
print('\n', metrics.classification_report(y_test, predictions))

df_cm = pd.DataFrame(metrics.confusion_matrix(y_test, predictions))
df_cm.rename({0: 'Peace', 1: 'War'}, axis = 1, inplace = True)
df_cm.rename(index = {0: 'Peace', 1: 'War'}, inplace = True)

fig, ax = plt.subplots(1, 1, figsize = (5, 5))
sns.set_context(font_scale = 1.2)
sns.heatmap(df_cm, annot = True, fmt = 'g', cbar = False, cmap = 'cividis')
ax.set_ylabel('True Label')
ax.set_xlabel('Predicted Label');
ax.xaxis.set_ticks_position('top') 
plt.tight_layout()