# Credit Rating Prediction
### Deep Learning Classifiers with Neural Networks and Multi-layer Perceptrons (MLP)

## Introduction

In [86]:
# import 
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.neural_network import MLPClassifier

In [58]:
# import data
corporate_pd = pd.read_csv("../corporate_rating.csv")
corporate_pd = corporate_pd.drop(['Name','Date','Rating Agency Name','Sector'],axis = 1)

# drop corporations with 2 or less observations
# https://stackoverflow.com/questions/29836836/how-do-i-filter-a-pandas-dataframe-based-on-value-counts
corporate_filtered = corporate_pd.groupby('Symbol').filter(lambda x: len(x) > 2)

corporate_filtered = corporate_filtered.drop('Symbol',axis = 1)
# convert the ratings to numerical values
ratings = corporate_filtered['Rating'].unique()
values = [3,4,2,5,6,7,10,8,1,9]
corporate_filtered['Rating'].replace(ratings,values,inplace=True)
corporate_filtered.head()

Unnamed: 0,Rating,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,netProfitMargin,pretaxProfitMargin,grossProfitMargin,operatingProfitMargin,returnOnAssets,...,effectiveTaxRate,freeCashFlowOperatingCashFlowRatio,freeCashFlowPerShare,cashPerShare,companyEquityMultiplier,ebitPerRevenue,enterpriseValueMultiple,operatingCashFlowPerShare,operatingCashFlowSalesRatio,payablesTurnover
0,3,0.945894,0.426395,0.09969,44.203245,0.03748,0.049351,0.176631,0.06151,0.041189,...,0.202716,0.437551,6.810673,9.809403,4.008012,0.049351,7.057088,15.565438,0.058638,3.906655
1,4,1.033559,0.498234,0.20312,38.991156,0.044062,0.048857,0.175715,0.066546,0.053204,...,0.074155,0.541997,8.625473,17.40227,3.156783,0.048857,6.460618,15.91425,0.067239,4.002846
2,4,0.963703,0.451505,0.122099,50.841385,0.032709,0.044334,0.170843,0.059783,0.032497,...,0.214529,0.513185,9.693487,13.103448,4.094575,0.044334,10.49197,18.888889,0.074426,3.48351
3,4,1.019851,0.510402,0.176116,41.161738,0.020894,-0.012858,0.138059,0.04243,0.02569,...,1.816667,-0.14717,-1.015625,14.440104,3.63095,-0.012858,4.080741,6.901042,0.028394,4.58115
4,4,0.957844,0.495432,0.141608,47.761126,0.042861,0.05377,0.17772,0.065354,0.046363,...,0.166966,0.451372,7.135348,14.257556,4.01278,0.05377,8.293505,15.808147,0.058065,3.85779


In [59]:
corporate_filtered_np = corporate_filtered.to_numpy()
corporate_filtered_np

array([[ 3.        ,  0.9458936 ,  0.42639463, ..., 15.56543837,
         0.05863769,  3.90665455],
       [ 4.        ,  1.03355902,  0.49823374, ..., 15.91424968,
         0.06723853,  4.00284605],
       [ 4.        ,  0.96370344,  0.45150542, ..., 18.88888889,
         0.07442633,  3.48350951],
       ...,
       [ 6.        ,  0.88387525,  0.84255282, ...,  1.5753285 ,
         0.28363421,  2.30016775],
       [ 6.        ,  0.91171323,  0.74835646, ...,  1.07444056,
         0.21778343,  1.99760765],
       [ 7.        ,  1.0850071 ,  1.02637452, ...,  2.25865015,
         0.25260643,  1.86568167]])

## Split the data into 10% testing and 90% training

In [60]:
# sample 10% of the total data in the test set
def split_train_test(data):
    np.random.seed(0)
    # sample 10% of the total number of indices 
    index = np.random.choice(len(data), size = len(data) // 10, replace = False)
    # save 10% in the test set
    test = data[index,:]
    # save the rest in the training set
    train = np.delete(data, index, axis = 0)
    return train,test

In [61]:
train,test = split_train_test(corporate_filtered_np)
print(test.shape)
print(train.shape)

(173, 26)
(1565, 26)


## Split the data into ratings and features

In [62]:
def split_rating_features(data):
    ratings = data[:,0]
    features = data[:,1:]
    return ratings, features

In [116]:
test_ratings, test_features = split_rating_features(test)
train_ratings, train_features = split_rating_features(train)
test_features.shape

(173, 25)

## Neural Network with ReLU activation function

In [77]:
# build a neural network with 3 layers
# the output layer has vector size of 10 since we have 10 credit ratings
model1 = tf.keras.models.Sequential([
  tf.keras.layers.Dense(25, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(11)
])

# Defining the loss function
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model1.compile(optimizer = 'adam',
              loss = loss_fn,
              metrics = ['accuracy'])

In [115]:
# fit the model
train_features = np.asarray(train_features).astype('float32')
model1.fit(train_features, train_ratings, epochs=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x221d2041e80>

In [83]:
# test the model
model1.evaluate(test_features, test_ratings, verbose=2)

6/6 - 0s - loss: 772.1143 - accuracy: 0.3064 - 32ms/epoch - 5ms/step


[772.1143188476562, 0.30635836720466614]

## Neural Network with Sigmoid activation function

In [114]:
model2 = tf.keras.models.Sequential([
  tf.keras.layers.Dense(20, activation='sigmoid'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(11)
])

# Defining the loss function
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model2.compile(optimizer = 'adam',
              loss = loss_fn,
              metrics = ['accuracy'])

# fit the model
train_features = np.asarray(train_features).astype('float32')
model2.fit(train_features, train_ratings, epochs=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x221d7f003d0>

In [85]:
# test the model
model2.evaluate(test_features,  test_ratings, verbose=2)

6/6 - 0s - loss: 1.5514 - accuracy: 0.3064 - 133ms/epoch - 22ms/step


[1.5513993501663208, 0.30635836720466614]

## Multi-layer perceptron Neural Network
INTRODUCTION

In [108]:
# MLP

model3 = MLPClassifier(hidden_layer_sizes=(4,6),
                    random_state=5,
                    verbose=True,
                    learning_rate_init=0.01)

In [109]:
# fit the data
model3.fit(train_features,train_ratings)

Iteration 1, loss = 8.75818431
Iteration 2, loss = 4.68719372
Iteration 3, loss = 3.65880946
Iteration 4, loss = 3.30186548
Iteration 5, loss = 2.94362173
Iteration 6, loss = 2.70342095
Iteration 7, loss = 2.57266938
Iteration 8, loss = 2.46533888
Iteration 9, loss = 2.38477772
Iteration 10, loss = 2.27392933
Iteration 11, loss = 2.13652448
Iteration 12, loss = 2.13587257
Iteration 13, loss = 2.14991780
Iteration 14, loss = 2.04848585
Iteration 15, loss = 2.02043056
Iteration 16, loss = 2.07099301
Iteration 17, loss = 2.05107629
Iteration 18, loss = 2.01817288
Iteration 19, loss = 1.91247309
Iteration 20, loss = 1.91551477
Iteration 21, loss = 1.96434993
Iteration 22, loss = 1.94935140
Iteration 23, loss = 1.87357697
Iteration 24, loss = 1.86244583
Iteration 25, loss = 1.91163897
Iteration 26, loss = 1.91010792
Iteration 27, loss = 1.84933217
Iteration 28, loss = 1.84943322
Iteration 29, loss = 1.83490499
Iteration 30, loss = 1.84437766
Iteration 31, loss = 1.86151506
Iteration 32, los

In [110]:
# calculate classification mse
def classification_mse(class_truth, pred_class):
    error = 0
    for i in range(len(class_truth)):
        if class_truth[i] != pred_class[i]:
            error = error + 1
    return error/len(class_truth)

In [111]:
# make predictions on the test set
preds=model3.predict(test_features)

# compute the classification mse 
classification_mse(test_ratings, preds)

0.7167630057803468

## Summary

### References:
- Multilayer Perceptron Neural Network Tutorial: https://machinelearninggeek.com/multi-layer-perceptron-neural-network-using-python/
- Select observations based on value counts:  https://stackoverflow.com/questions/29836836/how-do-i-filter-a-pandas-dataframe-based-on-value-counts