# Student Loan Risk with Deep Learning

In [546]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import save_model
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Note: you may need to restart the kernel to use updated packages.


---

## Prepare the data to be used on a neural network model

In [547]:
# Read the csv into a Pandas DataFrame
file_path = "https://static.bc-edx.com/mbc/ai/m6/datasets/student_loans.csv"
data = pd.read_csv(file_path)

# Review the DataFrame
data.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


### Step 1: Read the `student_loans.csv` file into a Pandas DataFrame. Review the DataFrame, looking for columns that could eventually define your features and target variables.   

In [548]:
# Review the data types associated with the columns
print(data.dtypes)

payment_history           float64
location_parameter        float64
stem_degree_score         float64
gpa_ranking               float64
alumni_success            float64
study_major_code          float64
time_to_completion        float64
finance_workshop_score    float64
cohort_ranking            float64
total_loan_score          float64
financial_aid_score       float64
credit_ranking              int64
dtype: object


### Step 2: Using the preprocessed data, create the features (`X`) and target (`y`) datasets. The target dataset should be defined by the preprocessed DataFrame column “credit_ranking”. The remaining columns should define the features dataset.

In [549]:
# Define the target set y using the credit_ranking column
y = data["credit_ranking"]

# Display a sample of y
y.sample(n=10)

216     5
921     6
214     6
701     6
1449    8
1178    5
507     6
591     6
734     5
1454    6
Name: credit_ranking, dtype: int64

In [550]:
# Define features set X by selecting all columns but credit_ranking
X = data.copy().drop(columns="credit_ranking")

# Review the features DataFrame
X.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


### Step 3: Split the features and target sets into training and testing datasets.


In [551]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

### Step 4: Use scikit-learn's `StandardScaler` to scale the features data.

In [552]:
# Create a StandardScaler instance
standard_scaler = StandardScaler()

# Fit the scaler to the features training dataset
standard_scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = standard_scaler.transform(X_train)
X_test_scaled = standard_scaler.transform(X_test)

---

## Compile and Evaluate a Model Using a Neural Network

### Step 1: Create a deep neural network by assigning the number of input features, the number of layers, and the number of neurons on each layer using Tensorflow’s Keras.

> **Hint** You can start with a two-layer deep neural network model that uses the `relu` activation function for both layers.


In [553]:
# Define the the number of inputs (features) to the model
number_of_inputs = 11

# Review the number of features


In [554]:
# Define the number of neurons in the output layer
output_nodes = 1

In [555]:
# Define the number of hidden nodes for the first hidden layer
layer1_nodes = 6

# Review the number hidden nodes in the first layer


In [556]:
# Define the number of hidden nodes for the second hidden layer
layer2_nodes = 4

# Review the number hidden nodes in the second layer


In [557]:
# Create the Sequential model instance
nn = Sequential()

In [558]:
# Add the first hidden layer
nn.add(Dense(units=layer1_nodes, input_dim=number_of_inputs, activation="relu"))


In [559]:
# Add the second hidden layer
nn.add(Dense(units=layer2_nodes, activation="relu"))


In [560]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=output_nodes, activation="linear"))

In [561]:
# Display the Sequential model summary
nn.summary()

Model: "sequential_192"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_577 (Dense)           (None, 6)                 72        
                                                                 
 dense_578 (Dense)           (None, 4)                 28        
                                                                 
 dense_579 (Dense)           (None, 1)                 5         
                                                                 
Total params: 105 (420.00 Byte)
Trainable params: 105 (420.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Step 2: Compile and fit the model using the `mse` loss function, the `adam` optimizer, and the `mse` evaluation metric.


In [562]:
# Compile the Sequential model
nn.compile(loss="mean_squared_error", optimizer=Adam(), metrics=["accuracy"])

In [563]:
# Fit the model using 50 epochs and the training data
deep_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Step 3: Evaluate the model using the test data to determine the model’s loss and accuracy.


In [564]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_metrics = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"loss: {model_loss}")
print(f"metrics: {model_metrics}")

13/13 - 0s - loss: 0.6793 - accuracy: 0.0000e+00 - 77ms/epoch - 6ms/step
loss: 0.6793414950370789
metrics: 0.0


In [565]:
# testResults = []
# for num_layer_1 in range(1, 10):
#     for num_layer_2 in range(1, 10):
#         network = Sequential()
#         network.add(Dense(units=num_layer_1, input_dim=number_of_inputs, activation="relu"))
#         network.add(Dense(units=num_layer_2, activation="relu"))
#         network.add(Dense(units=1))
#         network.compile(loss="mean_squared_error", optimizer=Adam(), metrics=["mse"])
#         network_model = nn.fit(X_train_scaled, y_train, epochs=100)
#         model_loss, model_metrics = nn.evaluate(X_test_scaled, y_test, verbose=2)
#         testResults.append({'layer1': num_layer_1, 'layer2': num_layer_2, 'loss': model_loss})

# try:
#     sortedResults = sorted(testResults, key=lambda x: x['loss'])
#     for obj in sortedResults:
#         print(obj)
# except Exception as e:
#     for obj in testResults:
#         print(obj)

### Step 4: Save and export your model to an HDF5 file, and name the file `student_loans.h5`.


In [566]:
# Set the model's file path
model_file_path = 'my_model.h5'

# Export your model to a HDF5 file
save_model(nn, model_file_path)

  save_model(nn, model_file_path)


---
## Predict Loan Repayment Success by Using your Neural Network Model

### Step 1: Reload your saved model.

In [567]:
# Set the model's file path
model_file_path = 'my_model.h5'

# Load the model to a new object
loaded_model = load_model(model_file_path)

### Step 2: Make predictions on the testing data.

In [568]:
# Make predictions on the testing data
predictions = nn.predict(X_test_scaled)
predictionsScaled = (nn.predict(X_test_scaled).round()).astype("int32")



### Step 3: Create a DataFrame to compare the predictions with the actual values.

In [569]:
# Create a DataFrame to compare the predictions with the actual values
results = pd.DataFrame({"predictions": predictions.ravel(), "actual": y_test})
resultsScaled = pd.DataFrame({"predictions": predictionsScaled.ravel(), "actual": y_test})

### Step 4: Display a sample of the DataFrame you created in step 3.

In [572]:
# Display sample data
results.head(10)

Unnamed: 0,predictions,actual
75,4.650062,5
1283,4.449917,6
408,5.744474,6
1281,5.439901,6
1118,7.063991,6
1143,6.477099,6
1215,6.032469,6
181,5.313588,5
1186,6.350879,5
1252,5.440395,5


In [571]:
resultsScaled.head(10)

Unnamed: 0,predictions,actual
75,5,5
1283,4,6
408,6,6
1281,5,6
1118,7,6
1143,6,6
1215,6,6
181,5,5
1186,6,5
1252,5,5


I believe after the rounding the results appear to be relatively good. There is still some variation between the actual scaling; and my models predicted value but it seems like it is a great first pass at predicting. With more data, and some more time in fine tuning I believe this would be a model that could save time of human intervention.