In [None]:
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from IPython.display import display
from IPython.display import Image
from sklearn.externals.six import StringIO  
from sklearn.tree import export_graphviz
from sklearn.linear_model import LogisticRegression
import pydot
pd.options.display.max_columns = None


In [None]:
# import dta
X = pd.read_csv('FinancialMetrics.csv')
y = pd.read_csv('DPD90.csv')

# split training data, validation data and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=102)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=102)

AdaBoost Classifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200)
bdt.fit(X_train, y_train)
bdt_val = bdt.predict(X_val)
val_prob_bdt = bdt.predict_proba(X_val)
bdt_pred = bdt.predict(X_test)
pred_prob_bdt = bdt.predict_proba(X_test)

from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,bdt_pred))
print(classification_report(y_test,bdt_pred))

Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier() #
rfc.fit(X_train, y_train)
rfc_val = rfc.predict(X_val)
val_prob_rfc = rfc.predict_proba(X_val)
rfc_pred = rfc.predict(X_test)
pred_prob_rfc = rfc.predict_proba(X_test)

from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,rfc_pred))
print(classification_report(y_test,rfc_pred))

Random Forest CrossValidation

In [None]:
from sklearn.model_selection import GridSearchCV
rfc = RandomForestClassifier() 

param_grid = {'min_samples_leaf': [2, 4, 6], 
              'max_depth': [3, 6, 9, 12], 
              'min_samples_split': [2, 5, 10]}

CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid)
CV_rfc.fit(X_train, y_train)
CV_rfc_val = CV_rfc.predict(X_val)
val_prob_CV_rfc = CV_rfc.predict_proba(X_val)
CV_rfc_pred = CV_rfc.predict(X_test)
pred_prob_CV_rfc = CV_rfc.predict_proba(X_test)

print(confusion_matrix(y_test,CV_rfc_pred))
print(classification_report(y_test,CV_rfc_pred))

Support Vector Classifier

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
scaled_X = scaler.transform(X)

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(scaled_X,y, test_size=0.30, random_state=101)
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.3, random_state=102)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=102)

In [None]:
from sklearn.svm import SVC
svc = SVC(probability=True)
svc.fit(X_train,y_train)
svc_val = svc.predict(X_val)
val_prob_svc = svc.predict_proba(X_val)
svc_pred = svc.predict(X_test)
pred_prob_svc = svc.predict_proba(X_test)

svc_trainpred = pd.DataFrame(svc.predict(X_train))
svc_testpred = pd.DataFrame(svc.predict(X_test))

print(confusion_matrix(y_test,svc_pred))
print(classification_report(y_test,svc_pred))

k-Nearest Neigbors Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train,y_train)
knn_val = knn.predict(X_val)
val_prob_knn = knn.predict_proba(X_val)
knn_pred = knn.predict(X_test)
pred_prob_knn = knn.predict_proba(X_test)

print(confusion_matrix(y_test,knn_pred))
print(classification_report(y_test,knn_pred))

Stacking Ensemble Using Logistic Regression

In [None]:
ens_val = pd.concat([pd.DataFrame(val_prob_rfc[:,1]), 
                     pd.DataFrame(val_prob_svc[:,1]), 
                     pd.DataFrame(val_prob_bdt[:,1])], axis=1)
ens_test = pd.concat([pd.DataFrame(pred_prob_rfc[:,1]), 
                      pd.DataFrame(pred_prob_svc[:,1]), 
                      pd.DataFrame(pred_prob_bdt[:,1])],axis=1)

lrc = LogisticRegression()
lrc.fit(ens_val, y_val)
lrc_pred = lrc.predict(ens_test)
lrc_val_pred = lrc.predict(ens_val)

val_prob_lrc = lrc.predict_proba(ens_val)
pred_prob_lrc = lrc.predict_proba(ens_test)

lrc_valpred = pd.DataFrame(val_prob_lrc)
lrc_testpred = pd.DataFrame(pred_prob_lrc)

print(confusion_matrix(y_test,lrc_pred))
print(classification_report(y_test,lrc_pred))