# IS424: Data Mining & Biz Analytics
### Team: G3T3
### Project: Predicting Loan Default based on Customer Profile
### Model selection: <font color='#0041C2'>Neural Network</font>
---

# 1. Setting up the notebook

In [1]:
import pandas as pd
import numpy as np

from imblearn.over_sampling import SMOTE
from category_encoders import TargetEncoder
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import recall_score, fbeta_score, roc_auc_score, make_scorer
from sklearn.model_selection import cross_val_score, StratifiedKFold

import tensorflow as tf
tf.autograph.set_verbosity(0)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Recall, AUC, Precision
import tensorflow_addons as tfa

tf.get_logger().setLevel("INFO")

 The versions of TensorFlow you are currently using is 2.4.0-rc0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
df_train = pd.read_csv("../dataset/train.csv")

y_train = df_train[["risk_flag"]]
x_train = df_train.drop("risk_flag", axis=1)

In [3]:
def target_encoding(df_x, df_y):
    x = df_x.copy()
    
    # Target Encoding — categorical columns with high cardinality: profession, city, state
    profession_target_enc = TargetEncoder()
    x["profession_encoded"] = profession_target_enc.fit_transform(x["profession"], df_y)
    
    city_target_enc = TargetEncoder()
    x["city_encoded"] = city_target_enc.fit_transform(x["city"], df_y)
    
    state_target_enc = TargetEncoder()
    x["state_encoded"] = state_target_enc.fit_transform(x["state"], df_y)
    
    x.drop("profession", axis=1, inplace=True)
    x.drop("city", axis=1, inplace=True)
    x.drop("state", axis=1, inplace=True)
    return x

scale_features = ['income','age','experience']
x_train = target_encoding(x_train, y_train)
scaler = MinMaxScaler()
x_train[scale_features] = scaler.fit_transform(x_train[scale_features])

  elif pd.api.types.is_categorical(cols):


# 2. Running base model

In [None]:
skf = StratifiedKFold(shuffle=True, n_splits=3, random_state=2021)
scores = []

for train, val in skf.split(x_train, y_train):
    model = Sequential()
    model.add(Dense(120, input_shape=(13,), activation='relu'))
    model.add(Dense(120, activation='relu'))
    model.add(Dense(120, activation='relu'))
    model.add(Dense(2, activation='softmax'))

    model.compile(loss='binary_crossentropy', 
                  optimizer='adam', 
                  metrics=["accuracy", Recall(), Precision(), tfa.metrics.FBetaScore(num_classes=2, beta=2.0), AUC()])

    x_t, y_t = x_train.iloc[train], y_train.iloc[train]
    x_val, y_val = x_train.iloc[val], y_train.iloc[val]

    oversampler = SMOTE(random_state=2021)
    x_t, y_t = oversampler.fit_resample(x_t, y_t)
    
    y_t = to_categorical(y_t, 2)
    y_val = to_categorical(y_val, 2)

    model_fit = model.fit(x_t, y_t, epochs=5, batch_size=64, verbose=0)
    result = model.evaluate(x_val, y_val, verbose=0)
    scores.append(result)
    
loss = [ val[0] for val in scores ]
accuracy = [ val[1] for val in scores ]
recall = [ val[2] for val in scores ]
precision = [ val[3] for val in scores ]
fbeta_2 = [ val[4][0] for val in scores ]
auc = [ val[5] for val in scores ]

loss.append( sum(loss) / len(loss) )
accuracy.append( sum(accuracy) / len(accuracy) )
recall.append( sum(recall) / len(recall) )
precision.append( sum(precision) / len(precision) )
fbeta_2.append( sum(fbeta_2) / len(fbeta_2) )
auc.append( sum(auc) / len(auc) )

score_df = pd.DataFrame(data=[loss, accuracy, recall, precision, fbeta_2, auc], columns=['Fold 1','Fold 2','Fold 3', 'Average'],
                        index=['Loss', 'Accuracy', 'Recall', 'Precision', 'Fbeta2', 'AUC'])
display(score_df)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
