In [98]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout


2023-12-01 13:03:55.956397: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-01 13:03:56.367840: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-01 13:03:58.097139: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/joeyio/anaconda3/envs/tf_fresh/lib/
2023-12-01 13:03:58.097809: W tensorflow/compiler/xla/s

In [11]:
# load data

alldata = pd.read_csv('alldata2.csv')
alldata.drop(columns = ['Unnamed: 0'], inplace = True)
X = alldata.iloc[: , 1:]
y = alldata.iloc[: , 0]

In [75]:
class CustomFeatures(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self # nothing to fit
    
    def transform(self, X, y=None):
        # Z2, Loan loss provision / net interest revenue
        LLPNIR = X.ELNLOS / X.NIM

        # Z3, Impaired loans / gross loans
        ILGL = X.EAMINTAN / X.LNLSGRS

        # Z4, Net charge off / Gross loans
        NCOGL = X.NTLNLS / X.LNLSGRS

        # Z5, Impaired loans / equity
        ILEQ = X.EAMINTAN / X.EQ

        # Z9, Equity / Net Loans
        EQNL = X.EQ / X.LNLSNET
        
        # no data for Z10

        # Z11, Equity / Liabilities
        EQLIAB = X.EQ / X.LIAB

        # Z13, Total capital / Net Loans
        TCNL = X.LIABEQ / X.LNLSNET

        # Z14, Total capital / deposits
        TCDEP = X.LIABEQ / X.DEP

        # Z15, Total capital / Liabilities
        TCLIAB = X.LIABEQ / X.LIAB

        # no data for Z18

        # Z21, taxes / avg assets
        TAVAST = X.ITAX / X.ASSET5

        # Z24, net income / equity
        NIEQ = X.NETINC / X.EQ

        # no data for Z25 and Z26, use Efficiency ratio instead

        # Z27 , net loans / total assets
        NLTA = X.LNLSNET / X.ASSET

        # not sure how Z28 and Z29 are different, use net loans / total deposits
        NLTD = X.LNLSNET / X.DEP

        # not sure how Z30 and 31 are different, use liquid assets / total deposits = (assets - fixed assets) / total deposits
        LATD = (X.ASSET - X.BKPREM) / X.DEP

        # construct final table
        finalTable = pd.concat([X.LNATRESR, LLPNIR, ILGL, NCOGL, ILEQ, X.IDT1RWAJR, X.EQTOTR, X.EQV, EQNL, EQLIAB, X.LIABEQR, TCNL, TCDEP, TCLIAB, X.NIMY, X.NIMR, X.NONIXR, X.PTAXNETINCR, TAVAST, X.ROA, X.ROE, NIEQ, X.EEFFR, NLTA, NLTD, LATD, X.ACTIVE], axis = 1)
        
        
        # drop rows that have resulted in division by zero
        finalTable.replace([np.inf, -np.inf], np.nan, inplace=True)
        finalTable.dropna(inplace = True)

        finalTable.columns = ['LNATRESR', 'LLPNIR', 'ILGL', 'NCOGL', 'ILEQ', 'IDT1RWAJR', 'EQTOTR', 'EQV', 'EQNL', 'EQLIAB', 'LIABEQR', 'TCNL', 'TCDEP', 'TCLIAB', 'NIMY', 'NIMR', 'NONIXR', 'PTAXNETINCR', 'TAVAST', 'ROA', 'ROE', 'NIEQ', 'EEFFR', 'NLTA', 'NLTD', 'LATD', 'ACTIVE']
        return finalTable

In [80]:
# this pipeline should be run before the data is split into features and targets or into training/test sets. 
# there is no fitting involved, only feature engineering in deriving features from other features.
# some of these ratios lead to division by zero, so these rows AND THEIR CORRESPONDING TARGETS must be dropped from the dataset before any splitting is done

pre_split_pipeline = Pipeline([
                    ('feature transformer', CustomFeatures())
                    ])

transformed_data = pre_split_pipeline.fit_transform(alldata)

In [95]:
# split data
X = transformed_data.drop(columns = 'ACTIVE')
y = transformed_data.ACTIVE
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = True)

In [96]:
# post split pipeline

post_split_pipeline = Pipeline([
                                ('scaler', StandardScaler())
                                ])

# assign scaled pipeline values back to dataframes
X_train.values[:] = post_split_pipeline.fit_transform(X_train)
X_test.values[:] = post_split_pipeline.transform(X_test)

In [99]:
model6 = Sequential([
    Dense(128, activation = 'relu'),
    Dropout(0.2),
    Dense(64, activation = 'relu'),
    Dropout(0.2),
    Dense(32, activation = 'relu'),
    Dropout(0.2),
    Dense(1, activation = 'sigmoid')
])

# Compile the model
model6.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.legacy.Adam(0.01),
    metrics = ['accuracy']
)

# Train the model
history6 = model6.fit(X_train,
                    y_train,
                    epochs=500,
                    validation_data=(X_test, y_test),
                    verbose=1)

2023-12-01 13:04:33.068361: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-01 13:04:33.096156: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-01 13:04:33.096202: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-01 13:04:33.098422: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other ope

Epoch 1/500
 11/225 [>.............................] - ETA: 1s - loss: 0.7500 - accuracy: 0.6108   

2023-12-01 13:04:37.781396: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 7