# Student Loan Risk with Deep Learning

In [1]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

---

## Prepare the data to be used on a neural network model

### Step 1: Read the `student_loans.csv` file into a Pandas DataFrame. Review the DataFrame, looking for columns that could eventually define your features and target variables.   

In [2]:
# Read the csv into a Pandas DataFrame
file_path = "https://static.bc-edx.com/mbc/ai/m6/datasets/student_loans.csv"
data = pd.read_csv(file_path)


# Review the DataFrame
data.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
# Review the data types associated with the columns

#print column names
# data.columns

#create a dataframe with pandas
# getting all cols here, else use
# df = pd.DataFrame(data, columns = ['payment_history', 'location_parameter', 'stem_degree_score', 'alumni_success'])
df = pd.DataFrame(data)

# Use Dataframe.dtypes to give the series of data types as result
datatypes = df.dtypes

# Print the data types of each column
datatypes


payment_history           float64
location_parameter        float64
stem_degree_score         float64
gpa_ranking               float64
alumni_success            float64
study_major_code          float64
time_to_completion        float64
finance_workshop_score    float64
cohort_ranking            float64
total_loan_score          float64
financial_aid_score       float64
credit_ranking              int64
dtype: object

### Step 2: Using the preprocessed data, create the features (`X`) and target (`y`) datasets. The target dataset should be defined by the preprocessed DataFrame column “credit_ranking”. The remaining columns should define the features dataset.

In [4]:

# Define the target set y using the credit_ranking column

# Spliting training and testing sets
# Create the target DataFrame, y i.e. labels set
y = data['credit_ranking']

# Display a sample of y
# first 5 values, else do: y.values
y.values[:5]

array([5, 5, 5, 6, 5])

In [5]:
# Define features set X by selecting all columns but credit_ranking

# Create the features DataFrame, X
X = data.copy()

# Drop feature/ category from master dataset
X = X.drop(columns='credit_ranking')

# Review the features DataFrame
X.head()


Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


### Step 3: Split the features and target sets into training and testing datasets.


In [6]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1


# Split the data into X_train, X_test, y_train, y_test
# Use train_test_split to separate the data
# using random state to make the data reproducable
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# visualize X_train
X_train


Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
1144,7.3,0.320,0.23,2.3,0.066,35.0,70.0,0.99588,3.43,0.62,10.1
73,8.3,0.675,0.26,2.1,0.084,11.0,43.0,0.99760,3.31,0.53,9.2
446,12.5,0.380,0.60,2.6,0.081,31.0,72.0,0.99960,3.10,0.73,10.5
399,8.7,0.765,0.22,2.3,0.064,9.0,42.0,0.99630,3.10,0.55,9.4
647,8.3,0.845,0.01,2.2,0.070,5.0,14.0,0.99670,3.32,0.58,11.0
...,...,...,...,...,...,...,...,...,...,...,...
715,7.2,0.490,0.18,2.7,0.069,13.0,34.0,0.99670,3.29,0.48,9.2
905,9.2,0.580,0.20,3.0,0.081,15.0,115.0,0.99800,3.23,0.59,9.5
1096,6.6,0.725,0.09,5.5,0.117,9.0,17.0,0.99655,3.35,0.49,10.8
235,7.2,0.630,0.00,1.9,0.097,14.0,38.0,0.99675,3.37,0.58,9.0


### Step 4: Use scikit-learn's `StandardScaler` to scale the features data.

In [7]:
# Create a StandardScaler instance

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Scale the training data
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

#visualize X_train_scaled
X_test_scaled

array([[ 0.26985713, -0.67852173,  1.87373569, ...,  1.43159743,
         0.01130905,  0.05358797],
       [ 0.21400374,  0.54947458,  0.05955639, ..., -0.34793415,
        -0.16632694, -0.22597102],
       [ 1.16351141, -1.06924782,  1.57137247, ..., -0.79281704,
         0.24815703,  0.7990786 ],
       ...,
       [-0.62379715, -0.95761179,  0.96664604, ...,  0.54183164,
         1.13633698,  0.51951962],
       [-1.51745143,  0.46574755, -1.35147195, ...,  1.68581622,
        -0.81765891, -0.50553001],
       [ 1.94545891, -1.12506584,  1.11782765, ..., -0.09371535,
         0.60342901,  0.23996063]])

---

## Compile and Evaluate a Model Using a Neural Network

### Step 1: Create a deep neural network by assigning the number of input features, the number of layers, and the number of neurons on each layer using Tensorflow’s Keras.

> **Hint** You can start with a two-layer deep neural network model that uses the `relu` activation function for both layers.


In [8]:
# Define the the number of inputs (features) to the model
# calculate the number of features/ columns
# X being the original dataset dropping the category
number_of_features= len(X.columns)
number_of_features

# Review the number of features
# get the minimum credit ranking
credit_min= data['credit_ranking'].min()

# get the maximum credit ranking
credit_max= data['credit_ranking'].max()

print(f"number_of_features: {number_of_features}, credit_ranking_min: {credit_min}, credit_ranking_max: {credit_max}")

number_of_features: 11, credit_ranking_min: 3, credit_ranking_max: 8


So, the data contains 11 variables representing different characteristics. Together, these characteristics allow us to assess the overall "credit_ranking" of a student on a scale from 3 (min) to 8 (max).

In [9]:
# Define the number of neurons in the output layer

# The output layer is the last layer so it will have the fewest neurons. We will use 1 neuron in our output layer.
nodes_output_layer = 1

Why 2 hidden layers?
so that can represent any arbitrary decision boundary to arbitrary accuracy
with rational activation functions and can approximate any smooth
mapping to any accuracy.

Also, since creating a deep learning model, we will use 2 hidden layers

In [10]:
# Define the number of hidden nodes for the first hidden layer

# using one pattern of thumb rule
# first hidden layer should have fewer neurons or nodes than the input layer (11 nodes), let's say that is half of the number of features/initial inputs
# let the number be m = n/2 where n being number of features
hidden_nodes_layer1 = round ( number_of_features/2 )


# Review the number hidden nodes in the first layer
hidden_nodes_layer1

6

In [11]:
# Define the number of hidden nodes for the second hidden layer
# the second hidden layer should have half of the first layer, let it be p = m/2 where m being number of nodes in the first layer
hidden_nodes_layer2 = round( hidden_nodes_layer1/2)

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

3

In [12]:
# Create the Sequential model instance
neuron = Sequential()


In [13]:
# Add the first hidden layer
neuron.add(Dense(units=hidden_nodes_layer1, input_dim=number_of_features, activation="relu"))

In [14]:
# Add the second hidden layer
neuron.add(Dense(units=hidden_nodes_layer2, activation="relu"))

In [15]:
# Add the output layer to the model specifying the number of output neurons and activation function

# Since  here we need to classify the students on a credit scale ranging from 3-8, the model’s output will be continuous rather than binary.
# In effect, the model we are building is a regression model and not a classification model.
# We will use the linear activation function for the output layer, which is suitable for regression problems.

neuron.add(Dense(units=1, activation="linear"))

In [16]:
# Display the Sequential model summary
neuron.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 72        
                                                                 
 dense_1 (Dense)             (None, 3)                 21        
                                                                 
 dense_2 (Dense)             (None, 1)                 4         
                                                                 
Total params: 97 (388.00 Byte)
Trainable params: 97 (388.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Step 2: Compile and fit the model using the `mse` loss function, the `adam` optimizer, and the `mse` evaluation metric.


In [17]:
# Compile the Sequential model
neuron.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

In [18]:
# Fit the model using 50 epochs and the training data
deep_net_model = neuron.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Step 3: Evaluate the model using the test data to determine the model’s loss and accuracy.


In [20]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = neuron.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

13/13 - 0s - loss: 0.7385 - mse: 0.7385 - 66ms/epoch - 5ms/step
Loss: 0.7385469675064087, Accuracy: 0.7385469675064087


Note: with 100 epochs theresults are like this:
output
13/13 - 0s - loss: 0.5985 - mse: 0.5985 - 70ms/epoch - 5ms/step
Loss: 0.5984755277633667, Accuracy: 0.5984755277633667

### Step 4: Save and export your model to an HDF5 file, and name the file `student_loans.h5`.


In [21]:
# Set the model's file path
file_path = Path("saved_models/credit_ranking.h5")

# Export your model to a HDF5 file
neuron.save(file_path)

  saving_api.save_model(


---
## Predict Loan Repayment Success by Using your Neural Network Model

### Step 1: Reload your saved model.

In [22]:
# Set the model's file path
file_path = Path("saved_models/credit_ranking.h5")

# Load the model to a new object
nn_model_imported = tf.keras.models.load_model(file_path)

### Additonal Step

In [23]:
# Evaluate the imported model
model_loss, model_accuracy = nn_model_imported.evaluate(X_test_scaled, y_test, verbose=2)

# Display evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


13/13 - 0s - loss: 0.7385 - mse: 0.7385 - 166ms/epoch - 13ms/step
Loss: 0.7385469675064087, Accuracy: 0.7385469675064087


Note: imported model produces the same performance metrics as the original model

### Step 2: Make predictions on the testing data.

In [24]:
# Make predictions on the testing data
predictions = nn_model_imported.predict(X_test_scaled).round().astype("int32")



### Step 3: Create a DataFrame to compare the predictions with the actual values.

In [25]:
# Create a DataFrame to compare the predictions with the actual values
results = pd.DataFrame({"predictions": predictions.ravel(), "actual": y_test})

### Step 4: Display a sample of the DataFrame you created in step 3.

In [26]:
# Display sample data
results.head(10)

Unnamed: 0,predictions,actual
75,5,5
1283,4,6
408,5,6
1281,5,6
1118,7,6
1143,6,6
1215,6,6
181,5,5
1186,5,5
1252,6,5


---
### Additional Steps: trying a few other models with different thumb rules

## (1) Creating a model with number of nodes convention: Thumb rule 2


In [27]:
# defining number of nodes
nodes_output_layer_model2 = 1

# Define the number of hidden nodes for the first hidden layer

# first hidden layer should have fewer neurons or nodes than the input layer (11 nodes)
# The number of hidden neurons should be 2/3 the size of the input layer, plus the size of the output layer.
hidden_nodes_layer1_model2 = round( (2*number_of_features)/3 + nodes_output_layer_model2 )

# Define the number of hidden nodes for the second hidden layer
# the second hidden layer should have, maybe 4 nodes, so that it has fewer neurons than the first hidden layer (8 nodes assigned)
hidden_nodes_layer2_model2 = round ((2*hidden_nodes_layer1_model2)/3 + nodes_output_layer_model2 )

hidden_nodes_layer1_model2, hidden_nodes_layer2_model2

#create model
neuron_model2 = Sequential()

# adding layers
neuron_model2.add(Dense(units=hidden_nodes_layer1_model2, input_dim=number_of_features, activation="relu"))
neuron_model2.add(Dense(units=hidden_nodes_layer2_model2, activation="relu"))
neuron_model2.add(Dense(units=1, activation="linear"))

# display summary
neuron_model2.summary()

# compile
neuron_model2.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# fit
deep_net_model2= neuron_model2.fit(X_train_scaled, y_train, epochs=50)

# measure accuracy
model_loss2, model_accuracy2 = neuron_model2.evaluate(X_test_scaled, y_test, verbose=2)

# print accuracy
print(f"Loss: {model_loss2}, Accuracy: {model_accuracy2}")


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 8)                 96        
                                                                 
 dense_4 (Dense)             (None, 6)                 54        
                                                                 
 dense_5 (Dense)             (None, 1)                 7         
                                                                 
Total params: 157 (628.00 Byte)
Trainable params: 157 (628.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
E

## (2) Creating a model with number of nodes convention: Thumb Rule 3

to prevent overfitting, let's keep the number of neurons below:𝑁ℎ=𝑁𝑠/𝛼∗(𝑁𝑖+𝑁𝑜)

where 𝑁𝑖
 = number of input neurons.
𝑁𝑜
 = number of output neurons.
𝑁𝑠
 = number of samples in training data set.
𝛼
 = an arbitrary scaling factor usually 2-10., let's take 𝛼= 5


In [28]:
# defining number of nodes
nodes_output_layer_model3 = 1
alpha=5
hidden_nodes_layer1_model3 = round (data.index.size/(alpha * (number_of_features + nodes_output_layer_model3) ))
hidden_nodes_layer2_model3 = round (data.index.size/(alpha * (hidden_nodes_layer1_model3 + nodes_output_layer_model3) ))

print(f"MODEL 3 nodes: layer1: {hidden_nodes_layer1_model3}, layer2: {hidden_nodes_layer2_model3}")

#create model
neuron_model3= Sequential()

# adding layers
neuron_model3.add(Dense(units=hidden_nodes_layer1_model3, input_dim=number_of_features, activation="relu"))
neuron_model3.add(Dense(units=hidden_nodes_layer2_model3, activation="relu"))
neuron_model3.add(Dense(units=1, activation="linear"))

# display summary
neuron_model3.summary()

# compile
neuron_model3.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# fit
deep_net_model3= neuron_model3.fit(X_train_scaled, y_train, epochs=50)

# measure accuracy
model_loss3, model_accuracy3 = neuron_model3.evaluate(X_test_scaled, y_test, verbose=2)

# print accuracy
print(f"MODEL 3 Evaluation: Loss: {model_loss3}, Accuracy: {model_accuracy3}")


MODEL 3 nodes: layer1: 27, layer2: 11
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 27)                324       
                                                                 
 dense_7 (Dense)             (None, 11)                308       
                                                                 
 dense_8 (Dense)             (None, 1)                 12        
                                                                 
Total params: 644 (2.52 KB)
Trainable params: 644 (2.52 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 2

In [30]:
print(f"MODEL 1 Evaluation-- Loss: {model_loss}, Accuracy: {model_accuracy}")
print(f"MODEL 2 Evaluation-- Loss: {model_loss2}, Accuracy: {model_accuracy2}")
print(f"MODEL 3 Evaluation-- Loss: {model_loss3}, Accuracy: {model_accuracy3}")

MODEL 1 Evaluation-- Loss: 0.7385469675064087, Accuracy: 0.7385469675064087
MODEL 2 Evaluation-- Loss: 0.6707820296287537, Accuracy: 0.6707820296287537
MODEL 3 Evaluation-- Loss: 0.4689522683620453, Accuracy: 0.4689522683620453


Conclusion:
From Accuracy standpoint the first model worked the best

---
### References:

https://hagan.okstate.edu/NNDesign.pdf#page=469

https://medium.com/geekculture/introduction-to-neural-network-2f8b8221fbd3

https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw


