In [None]:
%%time
# change to target directory

In [None]:
%%time
# import necessary dependencies

import os
import re
import gc

import pandas as pd
import numpy as np


from warnings import simplefilter
simplefilter(action = "ignore", category = FutureWarning)
simplefilter(action = "ignore", category = DeprecationWarning)

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, balanced_accuracy_score,precision_score
from sklearn.metrics import plot_confusion_matrix



In [None]:
%%time
# config necessary global variables

kbest_chi2 = [
    "Original UPB",
    "Postal Code",
    "Metropolitan Statistical Area (MSA) Or Metropolitan Division",
    "Mortgage Insurance Percentage (MI %)",
    "Original Loan Term",
    "Original Combined Loan-to-Value (CLTV)",
    "Original Loan-to-Value (LTV)",
    "Original Debt-to-Income (DTI) Ratio",
    "Original Interest Rate",
    "Number of Borrowers",
    "Maturity Date - Year",
    "Loan Purpose",
    "Servicer Name",
    "Seller Name",
    "Number of Units",
]

kbest_pearson = [
    "Interest Only (I/O) Indicator",
    "Prepayment Penalty Mortgage (PPM) Flag",
    "Amortization Type (Formerly Product Type)",
    "Original Interest Rate",
    "Loan Purpose",
    "Servicer Name",
    "Seller Name",
    "Original Combined Loan-to-Value (CLTV)",
    "Original Loan-to-Value (LTV)",
    "Original Debt-to-Income (DTI) Ratio",
    "Property Type",
    "Original Loan Term",
    "Maturity Date - Year",
    "Occupancy Status",
    "Original UPB",
]

In [None]:
%%time

# get paths to target files
parquetPaths = {}

for dirName, _, fileNames in os.walk(r"F:\FURP\#Centralised-Loan-Credit-Classification\Preprocessed-Histrocial-Data-0726-1700"):
    for fileName in fileNames:
        if re.match(r"preprocessed_historical_data_\d\d\d\dQ\d.parquet.gzip", fileName):
             #print(os.path.join(dirName, fileName))
            parquetPaths[fileName] = os.path.join(dirName, fileName)

In [None]:
parquetPaths

In [None]:
%%time

# load whole dataset
loan_data = pd.DataFrame()

for fileName in sorted(parquetPaths.keys()):
    print(fileName)
    df = pd.read_parquet(parquetPaths[fileName])
    loan_data = pd.concat([loan_data, df])
    
loan_data.info()

del df
gc.collect()

In [None]:
%%time

# data preprocessing
X = loan_data[kbest_pearson]
Y = loan_data["Credit Rank"]

del loan_data
gc.collect()

X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
                                                    test_size = 0.2,
                                                    random_state = 42)

del X
del Y
gc.collect()

# Apply Feature Scaling
sc = StandardScaler()
sc_scaler = sc.fit(X_train)
X_train = sc_scaler.transform(X_train)
X_test = sc_scaler.transform(X_test)

In [None]:
%%time

# MLPClassifier - Multi-Layer Perceptron Classifier
mlp_clf = MLPClassifier(max_iter = 1000, 
                            activation = "relu",
                            solver = "adam")

mlp_clf.fit(X_train, Y_train)
Y_pred_mlp = mlp_clf.predict(X_test) 
    
print("MLP Classifier:")
print("Accuracy Score: {}".format(accuracy_score(Y_test, Y_pred_mlp)))
print("Balanced Accuracy Score: {}".format(balanced_accuracy_score(Y_test, Y_pred_mlp)))
print("Classification Report:")
print(classification_report(Y_test, Y_pred_mlp))
mlp_clf_rport = classification_report(Y_test, Y_pred_mlp, output_dict = True)

In [None]:
%%time
%cd r"F:\FURP\#Centralised-Loan-Credit-Classification"

In [None]:
# accuracy score:
from sklearn.metrics import accuracy_score

plt.style.use("ggplot")
plt.figure()

plt.plot(np.arange(0, EPOCHS), mlp_clf_rport.history["categorical_accuracy"], label="mlp accuracy")
 
plt.title("mlp Accuracy scores")
plt.xlabel("Epochs")
plt.ylabel("Scores")
plt.legend(loc="lower left")

plt.savefig("mlp AccuracyScore.png")

In [None]:
# f1 score:
from sklearn.metrics import f1_score
 
algorithm_x = ['mlp_clf']

mlp_f1_score = sklearn.metrics.f1_score(Y_test, Y_pred_mlp) 

f1_score_y = [mlp_f1_score]

plt.bar(algorithm_x, f1_score_y)

#plt.xlabel("")
#plt.ylabel("f1_score")
plt.title("mlp f1_score")

#plt.legend()

plt.savefig("mlp F1Score.png")

In [None]:
from sklearn.metrics import recall_score

algorithm_x = ['mlp_clf']

mlp_recall_score = metrics.precision_score(Y_test, Y_pred_mlp)

recall_score_y = [mlp_recall_score]

plt.bar(algorithm_x,recall_score_y)

#plt.xlabel("")
#plt.ylabel("precision_score")
plt.title("mlp recall_score")

#plt.legend()

plt.savefig("mlp recall_score.png")

In [None]:
from sklearn.metrics import precision_score

algorithm_x = ['mlp_clf']

mlp_precision_score = metrics.precision_score(Y_test, Y_pred_mlp)

precision_score_y = [mlp_precision_score]

plt.bar(algorithm_x,precision_score_y)

#plt.xlabel("")
#plt.ylabel("precision_score")
plt.title("mlp precision_score")

#plt.legend()

plt.savefig("mlp Precision_Score.png")

In [None]:
import numpy as np
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc

#y_test_mlp= np.array([1, 1, 2, 2])
#y_score_mlp = np.array([0.1, 0.2, 0.35, 0.8])
#y_test_rf= np.array([1, 1, 2, 2])
#y_score_rf = np.array([0.1, 0.3, 0.35, 0.8])
#y_test_bt= np.array([1, 1, 2, 2])
#y_score_bt = np.array([0.1, 0.4, 0.35, 0.8])
#y_test_knn= np.array([1, 1, 2, 2])
#y_score_knn = np.array([0.1, 0.5, 0.35, 0.8])


y_score_mlp = mlp_clf.fit(X_train, Y_train).decision_function(X_test)
fpr_mlp, tpr_mlp, thresholds_mlp = metrics.roc_curve(y_test, y_score_mlp, pos_label=2)
auc_mlp = metrics.auc(fpr_mlp, tpr_mlp)


In [None]:
import matplotlib.pyplot as plt
lw = 2

plt.plot(fpr_mlp, tpr_mlp, color='darkorange',
         lw=lw, label='MLP ROC curve (AUC = %0.2f)'% auc_mlp)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
#plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.title('mlp AUC-ROC')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
#plt.xlabel('False Positive Rate')
#plt.ylabel('True Positive Rate')

plt.savefig("mlp AUC-ROC.png")