# Student Loan Risk
```{admonition} Revised
25 Jul 2023
```

---

## Programming Environment

In [1]:
import numpy  as np
import pandas as pd

import sklearn
from   sklearn.ensemble        import RandomForestClassifier
from   sklearn.linear_model    import LogisticRegression
from   sklearn.metrics         import accuracy_score
from   sklearn.model_selection import train_test_split
from   sklearn.preprocessing   import StandardScaler

import tensorflow as tf
from   tensorflow.keras.layers import Dense
from   tensorflow.keras.models import Sequential

import datetime
from   importlib.metadata import version
import os
import platform as p
import sys

pad = 20
print(  f"\n{'Executed' : <{pad}} : {datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z %Z')}"
        f"\n{'Platform' : <{pad}} : {p.platform(aliased = False, terse = False)}"
        f"\n{'Conda'    : <{pad}} : {os.environ['CONDA_DEFAULT_ENV'] or sys.executable.split('/')[-3]}"
        f"\n{'Python'   : <{pad}} : {p.python_implementation()} {p.python_version()} {sys.executable}")
print(*[f'{name : <{pad}} : {version(name)}'
        for name in ['NumPy', 'Pandas', 'Scikit-Learn', 'TensorFlow-macos', 'TensorFlow-metal']], sep = '\n')


Executed             : 2023-09-04 12:17:02 -0400 EDT
Platform             : macOS-13.5.1-arm64-arm-64bit
Conda                : tf
Python               : CPython 3.11.5 /Users/df/anaconda3/envs/tf/bin/python
NumPy                : 1.24.3
Pandas               : 2.1.0
Scikit-Learn         : 1.3.0
TensorFlow-macos     : 2.13.0
TensorFlow-metal     : 1.0.1


In [2]:
np.show_config()

openblas64__info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)]
    runtime_library_dirs = ['/usr/local/lib']
blas_ilp64_opt_info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None)]
    runtime_library_dirs = ['/usr/local/lib']
openblas64__lapack_info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None), ('BLAS_SYMBOL_SUFFIX', '64_'), ('HAVE_BLAS_ILP64', None), ('HAVE_LAPACKE', None)]
    runtime_library_dirs = ['/usr/local/lib']
lapack_ilp64_opt_info:
    libraries = ['openblas64_', 'openblas64_']
    library_dirs = ['/usr/local/lib']
    language = c
    define_macros = [('HAVE_CBLAS', None

In [3]:
np.show_runtime()

[{'simd_extensions': {'baseline': ['NEON', 'NEON_FP16', 'NEON_VFPV4', 'ASIMD'],
                      'found': ['ASIMDHP', 'ASIMDDP'],
                      'not_found': ['ASIMDFHM']}},
 {'architecture': 'armv8',
  'filepath': '/Users/davefriedman/anaconda3/envs/jb/lib/python3.11/site-packages/numpy/.dylibs/libopenblas64_.0.dylib',
  'internal_api': 'openblas',
  'num_threads': 10,
  'prefix': 'libopenblas',
  'threading_layer': 'pthreads',
  'user_api': 'blas',
  'version': '0.3.21'},
 {'architecture': 'VORTEX',
  'filepath': '/Users/davefriedman/anaconda3/envs/jb/lib/libopenblas.0.dylib',
  'internal_api': 'openblas',
  'num_threads': 10,
  'prefix': 'libopenblas',
  'threading_layer': 'openmp',
  'user_api': 'blas',
  'version': '0.3.23'},
 {'filepath': '/Users/davefriedman/anaconda3/envs/jb/lib/libomp.dylib',
  'internal_api': 'openmp',
  'num_threads': 10,
  'prefix': 'libomp',
  'user_api': 'openmp',
  'version': None}]


In [4]:
sklearn.show_versions()


System:
    python: 3.11.4 | packaged by conda-forge | (main, Jun 10 2023, 18:08:41) [Clang 15.0.7 ]
executable: /Users/davefriedman/anaconda3/envs/jb/bin/python
   machine: macOS-13.3.1-arm64-arm-64bit

Python dependencies:
      sklearn: 1.3.0
          pip: 23.1.2
   setuptools: 68.0.0
        numpy: 1.24.3
        scipy: 1.11.1
       Cython: 3.0.0
       pandas: 2.0.3
   matplotlib: 3.7.2
       joblib: 1.3.0
threadpoolctl: 3.2.0

Built with OpenMP: True

threadpoolctl info:
       user_api: blas
   internal_api: openblas
    num_threads: 10
         prefix: libopenblas
       filepath: /Users/davefriedman/anaconda3/envs/jb/lib/python3.11/site-packages/numpy/.dylibs/libopenblas64_.0.dylib
        version: 0.3.21
threading_layer: pthreads
   architecture: armv8

       user_api: blas
   internal_api: openblas
    num_threads: 10
         prefix: libopenblas
       filepath: /Users/davefriedman/anaconda3/envs/jb/lib/libopenblas.0.dylib
        version: 0.3.23
threading_layer: openm

In [2]:
physical_devices_cpu = tf.config.list_physical_devices('CPU')
physical_devices_gpu = tf.config.list_physical_devices('GPU')
logical_devices_cpu  = tf.config.list_logical_devices ('CPU')
logical_devices_gpu  = tf.config.list_logical_devices ('GPU')

print(f'Num CPUs (pre  runtime init): {len(physical_devices_cpu)}')
print(f'Num GPUs (pre  runtime init): {len(physical_devices_gpu)}')
print(f'Num CPUs (post runtime init): {len(logical_devices_cpu)}')
print(f'Num GPUs (post runtime init): {len(logical_devices_gpu)}')

Num CPUs (pre  runtime init): 1
Num GPUs (pre  runtime init): 1
Num CPUs (post runtime init): 1
Num GPUs (post runtime init): 1


2023-09-04 12:17:30.784703: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2023-09-04 12:17:30.784735: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2023-09-04 12:17:30.784741: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2023-09-04 12:17:30.784844: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-09-04 12:17:30.785098: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


---

```python
loans_df = pd.read_csv(filepath_or_buffer = 'https://static.bc-edx.com/mbc/ai/m6/datasets/student_loans.csv')
loans_df.head()

X = loans_df.copy()
X = X.drop(columns = ['credit_ranking'])
y = loans_df['credit_ranking']
y.value_counts()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

# STANDARD SCALER
scaler         = StandardScaler()
X_scaler       = scaler.fit(X = X_train)
X_train_scaled = X_scaler.transform(X = X_train)
X_test_scaled  = X_scaler.transform(X = X_test)

# NN
number_input_features = len(X_train.iloc[0]) # 11
number_output_neurons = 1
hidden_nodes_layer1   = (number_input_features + number_output_neurons) // 2 # 6
hidden_nodes_layer2   = (hidden_nodes_layer1   + number_output_neurons) // 2 # 3
nn = Sequential()
nn.add(Dense(units      = hidden_nodes_layer1,
             input_dim  = number_input_features,
             activation = 'relu'))
nn.add(Dense(units      = hidden_nodes_layer2,
             activation = 'relu'))
nn.add(Dense(units      = number_output_neurons,
             activation = 'linear'))
nn.summary()
nn.compile(loss      = 'mse',
           optimizer = 'adam',
           metrics   = ['mse'])
fit_model = nn.fit(x = X_train_scaled,
                   y = y_train,
                   epochs = 50)
nn.evaluate(x = X_test_scaled,
            y = y_test,
            verbose = 2)
nn.save('saved_models/student_loans.h5')

nn_imported = tf.keras.models.load_model(filepath = 'saved_models/student_loans.h5')
predictions = nn_imported.predict(x = X_test_scaled).round().astype('int32')
results     = pd.DataFrame(data = {
  'predictions' : predictions.ravel(),
  'actual'      : y_test,
})
```

---

In [7]:
loans_df = pd.read_csv(filepath_or_buffer = 'student_loans.csv')
loans_df.head()

Unnamed: 0,payment_history,location_parameter,stem_degree_score,gpa_ranking,alumni_success,study_major_code,time_to_completion,finance_workshop_score,cohort_ranking,total_loan_score,financial_aid_score,credit_ranking
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [8]:
loans_df.info(memory_usage = True,
              show_counts  = True,
              verbose      = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   payment_history         1599 non-null   float64
 1   location_parameter      1599 non-null   float64
 2   stem_degree_score       1599 non-null   float64
 3   gpa_ranking             1599 non-null   float64
 4   alumni_success          1599 non-null   float64
 5   study_major_code        1599 non-null   float64
 6   time_to_completion      1599 non-null   float64
 7   finance_workshop_score  1599 non-null   float64
 8   cohort_ranking          1599 non-null   float64
 9   total_loan_score        1599 non-null   float64
 10  financial_aid_score     1599 non-null   float64
 11  credit_ranking          1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [9]:
X = loans_df.copy()
X = X.drop(columns = ['credit_ranking'])
y = loans_df['credit_ranking']
y.value_counts()

credit_ranking
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

In [10]:
np.info(object = y.values)

class:  ndarray
shape:  (1599,)
strides:  (8,)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  True
data pointer: 0x29b1a6800
byteorder:  little
byteswap:  False
type: int64


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

print(X_train.shape)
print(y_train.shape)
print(X_test .shape)
print(y_test .shape)

(1199, 11)
(1199,)
(400, 11)
(400,)


In [12]:
scaler   = StandardScaler()
X_scaler = scaler.fit(X = X_train)

X_train_scaled = X_scaler.transform(X = X_train)
X_test_scaled  = X_scaler.transform(X = X_test)

print(X_train_scaled.shape)
print(X_test_scaled .shape)

(1199, 11)
(400, 11)


In [13]:
number_input_features = len(X_train.iloc[0]) # 11
number_output_neurons = 1

hidden_nodes_layer1   = (number_input_features + number_output_neurons) // 2 # 6
hidden_nodes_layer2   = (hidden_nodes_layer1   + number_output_neurons) // 2 # 3

nn = Sequential()
nn.add(Dense(units      = hidden_nodes_layer1,
             input_dim  = number_input_features,
             activation = 'relu'))
nn.add(Dense(units      = hidden_nodes_layer2,
             activation = 'relu'))
nn.add(Dense(units      = number_output_neurons,
             activation = 'linear'))
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 72        
                                                                 
 dense_1 (Dense)             (None, 3)                 21        
                                                                 
 dense_2 (Dense)             (None, 1)                 4         
                                                                 
Total params: 97 (388.00 Byte)
Trainable params: 97 (388.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
nn.compile(loss      = 'mse',
           optimizer = 'adam',
           metrics   = ['mse'])

fit_model = nn.fit(x = X_train_scaled,
                   y = y_train,
                   epochs = 50)

Epoch 1/50
 1/38 [..............................] - ETA: 10s - loss: 31.4691 - mse: 31.4691

2023-07-25 01:41:36.870822: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
nn.evaluate(x = X_test_scaled,
            y = y_test,
            verbose = 2)

13/13 - 0s - loss: 0.3991 - mse: 0.3991 - 129ms/epoch - 10ms/step


2023-07-25 01:41:47.841394: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


[0.39913174510002136, 0.39913174510002136]

In [16]:
nn.save('saved_models/student_loans.h5')

  saving_api.save_model(


In [17]:
nn_imported = tf.keras.models.load_model(filepath = 'saved_models/student_loans.h5')
nn_imported

<keras.src.engine.sequential.Sequential at 0x2abad0890>

In [18]:
predictions = nn_imported.predict(x = X_test_scaled).round().astype('int32')

np.info(object = predictions)

 1/13 [=>............................] - ETA: 0s

2023-07-25 01:41:48.089861: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


class:  ndarray
shape:  (400, 1)
strides:  (4, 4)
itemsize:  4
aligned:  True
contiguous:  True
fortran:  True
data pointer: 0x2b42eee00
byteorder:  little
byteswap:  False
type: int32


In [19]:
results = pd.DataFrame(data = {
  'predictions' : predictions.ravel(),
  'actual'      : y_test,
})
results

Unnamed: 0,predictions,actual
75,6,5
1283,5,6
408,6,6
1281,6,6
1118,7,6
...,...,...
1416,6,5
1567,5,5
1204,6,7
12,5,5


---