### Student Loans Risk analysis with Deep Learning model

In [1]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report


#### Get the data and Prepare it for a neural network model

#### Step 1: Read the **student-loans.csv** file into a DataFrame. Review the DF, look for columns that could define features and target variables.

In [2]:
# Read the csv into a Pandas DataFrame
file_path = "https://static.bc-edx.com/ai/ail-v-1-0/m18/lms/datasets/student-loans.csv"
loans_df = pd.read_csv(file_path)

# Review the DataFrame
loans_df.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0


In [3]:
# Review the data types associated with the columns
loans_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   payment_history         1599 non-null   float64
 1   location_parameter      1599 non-null   float64
 2   stem_degree_score       1599 non-null   float64
 3   gpa_ranking             1599 non-null   float64
 4   alumni_success          1599 non-null   float64
 5   study_major_code        1599 non-null   float64
 6   time_to_completion      1599 non-null   float64
 7   finance_workshop_score  1599 non-null   float64
 8   cohort_ranking          1599 non-null   float64
 9   total_loan_score        1599 non-null   float64
 10  financial_aid_score     1599 non-null   float64
 11  credit_ranking          1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [4]:
# Check the credit ranking value counts
loans_df["credit_ranking"].value_counts()

credit_ranking
1    855
0    744
Name: count, dtype: int64

#### Construct the features (X) and targets (y) datasets from the preprocessed data. Define the target dataset using **"credit_ranking"** column of the preprocessed DataFrame, while the remaining columns will constitute the features dataset.

In [5]:
# Define the target set y from credit_ranking column
y = loans_df["credit_ranking"]

# Display array sample of target y 
y[:10].values

array([0, 0, 0, 1, 0, 0, 0, 1, 1, 0])

In [6]:
# Define fetures set X, select all columns besides credit_ranking 
X = loans_df.drop(columns="credit_ranking")

# Reeview features
X.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


#### Step 3: Now let's split the target and features sets into training and testing

In [7]:
# Split the preprocessed data into a training and testing datasets
# Set random state
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

#### Step 4: Use `StandardSaler` to scale the featurees data

In [8]:
# Create a StandardScaler instance
Scaler = StandardScaler()

# Fit the scaler to the features training dataset
Scaler.fit(X_train)

# Scale the data
X_train_scaled = Scaler.fit_transform(X_train)
X_test_scaler = Scaler.transform(X_test)


#### Step 5: Create a Neural Network Model
In this step we'll create a deep neural network. We'll assign the number of input features, number of layers, and number of neurons for each layer utilizing Keras.



In [12]:
# Define the number of input features for the model
input_features = len(X_train.columns)

# Review the number of features
input_features

11

In [13]:
# Define the number of hidden nodes for the 1st hidden layer
hidden_nodes_layer1 = 6

# Define the number of hidden nodes for the 2dn hidden layer
hidden_nodes_layer2 = 3

# Define the number of neurons in the output layer 
output_nodes = 1 


In [15]:
# Create a Sequential model 
nn_model = Sequential()

# Add the first hidden layer 
nn_model.add(Dense(units=hidden_nodes_layer1,
                   input_dim=input_features, activation="relu"))



In [17]:
# Add second hidden layer
nn_model.add(Dense(units=hidden_nodes_layer2, activation="relu"))
            

In [18]:
# Add the output layer to the model.
#Specify teh number of output neurons with activation function.
nn_model.add(Dense(units=output_nodes, activation="sigmoid"))

In [20]:
# Display the Sequential model summary
nn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 6)                 72        
                                                                 
 dense_2 (Dense)             (None, 3)                 21        
                                                                 
 dense_3 (Dense)             (None, 1)                 4         
                                                                 
Total params: 97
Trainable params: 97
Non-trainable params: 0
_________________________________________________________________


#### Step 6: Compile and fit the model using `binary_crosstropy` los function with `adam` optimizer, and the `accuracy` evaluation metric.

In [21]:
# Compile the Sequential model
nn_model.compile(loss="binary_crossentropy",
                 optimizer="adam", metrics=["accuracy"])

In [22]:
# Fit the model, use 50 epocs and training data
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50


2024-06-04 00:02:45.975844: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


#### Step 7: Evaluate the model using test data to determine the model's loss and accuracy.
