In [74]:
 # Imports
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense
import pandas as pd
import tensorflow as tf
from pathlib import Path
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split


In [75]:
 # Read the csv into a Pandas DataFrame
file_path = "https://static.bc-edx.com/ai/ail-v-1-0/m18/lms/datasets/student-loans.csv"
loans_df = pd.read_csv(file_path)

#Review the DataFrame
loans_df.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0


In [76]:
# Review the data types associated with the columns\n",
loans_df.dtypes
     

payment_history           float64
location_parameter        float64
stem_degree_score         float64
gpa_ranking               float64
alumni_success            float64
study_major_code          float64
time_to_completion        float64
finance_workshop_score    float64
cohort_ranking            float64
total_loan_score          float64
financial_aid_score       float64
credit_ranking              int64
dtype: object

In [77]:
# Check the credit_ranking value counts
loans_df["credit_ranking"].value_counts()

credit_ranking
1    855
0    744
Name: count, dtype: int64

In [78]:
 # Define the target set y using the credit_ranking column
y = loans_df["credit_ranking"]

# Display a sample of y
y[:5]

0    0
1    0
2    0
3    1
4    0
Name: credit_ranking, dtype: int64

In [79]:
# Define features set X by selecting all columns but credit_ranking
X = loans_df.copy().drop("credit_ranking", axis=1)

In [80]:
# Review the features DataFrame
X.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4


In [81]:
 # Split the preprocessed data into a training and testing dataset
 # Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, train_size=0.8)

In [82]:
# Step 4: Use scikit-learn's `StandardScaler` to scale the features data.

In [83]:
# Create a StandardScaler instance
scaler = StandardScaler().fit(X_train)

# Fit the scaler 
X_test_scaled = scaler.transform(X_test)
X_train_scaled = scaler.transform(X_train)

In [84]:
# number of inputs 
inputs = X_train_scaled.shape[1]
# Review the number of features
inputs

11

In [85]:
 # number of hidden nodes for the first hidden layer
 l1_nodes = 8
# number of hidden nodes for the second hidden layer
l2_nodes = 5
# number of neurons in the output layer
number_output_neurons = 1

In [86]:
 # Create the Sequential model instance
nn = Sequential()
# Add the first hidden layer
nn.add(Dense(units=l1_nodes, input_dim=inputs, activation="relu"))
# Add the second hidden layer
nn.add(Dense(units=l2_nodes, activation="relu"))   
 # Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons, activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [87]:
# Display the Sequential model summary
nn.summary()

In [88]:
 # Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [89]:
# Fit the model using 50 epochs and the training data
model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 642us/step - accuracy: 0.4627 - loss: 0.7867 
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 640us/step - accuracy: 0.4671 - loss: 0.7162
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 597us/step - accuracy: 0.5443 - loss: 0.6897
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 610us/step - accuracy: 0.5697 - loss: 0.6589
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 608us/step - accuracy: 0.5943 - loss: 0.6456
Epoch 6/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 660us/step - accuracy: 0.6150 - loss: 0.6263
Epoch 7/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 617us/step - accuracy: 0.6425 - loss: 0.6127
Epoch 8/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 644us/step - accuracy: 0.6818 - loss: 0.5962
Epoch 9/50
[1m40/40[0m [32m━━━━━━━━━

In [90]:
 # model loss and accuracy metrics
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
 # Display the model loss and accuracy 
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


10/10 - 0s - 9ms/step - accuracy: 0.7594 - loss: 0.5120
Loss: 0.5120489597320557, Accuracy: 0.7593749761581421


In [91]:
 # Set the file path 
set_path = Path("My_models/student_loan_model.keras")
# Export model to a keras
nn.save(set_path)

In [92]:
# Set the model's file path
set_path = Path("My_models/student_loan_model.keras")
# Load the model to new object
nn_model = tf.keras.models.load_model(set_path)                    

In [93]:
 # Make predictions with the test data
predictions = nn_model.predict(X_test_scaled)
# Display a sample of the predictions
predictions[:5]

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 667us/step


array([[0.1808961 ],
       [0.33235464],
       [0.9369788 ],
       [0.818619  ],
       [0.9703674 ]], dtype=float32)

In [94]:
# Save the predictions to a DataFrame 
results = pd.DataFrame(y_test)
results["predicted"] = predictions.round()

results

Unnamed: 0,credit_ranking,predicted
75,0,0.0
1283,1,0.0
408,1,1.0
1281,1,1.0
1118,1,1.0
...,...,...
890,0,1.0
146,0,0.0
1551,0,0.0
1209,1,1.0


In [95]:
# classification report with the y test data and predictions

print(classification_report(results["credit_ranking"], results["predicted"]))

              precision    recall  f1-score   support

           0       0.74      0.77      0.75       154
           1       0.78      0.75      0.76       166

    accuracy                           0.76       320
   macro avg       0.76      0.76      0.76       320
weighted avg       0.76      0.76      0.76       320

