<a href="https://colab.research.google.com/github/johntanas/it1244project/blob/main/code/model_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [175]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

In [176]:
path_to_evm_label="/content/expo_moving_k2_labels.csv"
path_to_app="/content/cleaned_application.csv"
cleaned_app_df=pd.read_csv(path_to_app,index_col=0)
ewm_df_status=pd.read_csv(path_to_evm_label,index_col=0)

In [177]:
labels_to_int_evm={"bad":0,"good":1}

In [178]:
ewm_df=ewm_df_status.drop(columns="status").replace({"label":labels_to_int_evm})

In [179]:
dep_var="label"
random_state=42

In [180]:
train_df=ewm_df.merge(cleaned_app_df,how="inner",on="id").drop("id",axis=1)

In [181]:
numerical_transformer = StandardScaler()
categorical_transformer=OneHotEncoder()

In [182]:
X,y=train_df.drop(columns=["label"]),train_df["label"]

In [183]:
categorical_cols = [cname for cname in X.columns if X[cname].nunique() < 10 and 
                        X[cname].dtype == "object"]
numerical_cols = [cname for cname in X.columns if X[cname].dtype in ['int64', 'float64']]

In [184]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

In [185]:
xs=preprocessor.fit_transform(X)

In [186]:
X_train, X_test, y_train, y_test = train_test_split(xs, y,random_state=random_state)

In [187]:
classifiers = {
    "LogisticRegression" : LogisticRegression(max_iter=1000,random_state=random_state),
    "KNeighbors" : KNeighborsClassifier(n_neighbors=int(len(X_train)**0.5)),
    "SVC" : SVC(random_state=random_state),
    "DecisionTree" : DecisionTreeClassifier(random_state=random_state),
    "RandomForest" : RandomForestClassifier(random_state=random_state),
    "XGBoost" : XGBClassifier(random_state=random_state)
}

In [188]:
for key, classifier in classifiers.items():
    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    print("Model "+key)
    print(classification_report(y_test, prediction,zero_division=0))

Model LogisticRegression
              precision    recall  f1-score   support

           0       1.00      0.00      0.00       511
           1       0.73      1.00      0.84      1380

    accuracy                           0.73      1891
   macro avg       0.87      0.50      0.42      1891
weighted avg       0.80      0.73      0.62      1891

Model KNeighbors
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       511
           1       0.73      1.00      0.84      1380

    accuracy                           0.73      1891
   macro avg       0.36      0.50      0.42      1891
weighted avg       0.53      0.73      0.62      1891

Model SVC
              precision    recall  f1-score   support

           0       1.00      0.00      0.00       511
           1       0.73      1.00      0.84      1380

    accuracy                           0.73      1891
   macro avg       0.87      0.50      0.42      1891
weighted avg       0.8

In [189]:
int_to_labels_evm={"bad":0,"good":1}

In [190]:
import keras
import tensorflow as tf
from keras.utils.np_utils import to_categorical
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten


In [191]:
model=keras.Sequential([
keras.layers.Dense(1024),
keras.layers.Activation("sigmoid"),
keras.layers.Dense(512),
keras.layers.Activation(activation="relu"),
keras.layers.Dense(2),
keras.layers.Activation(activation="softmax")])

In [192]:
one_hot_y=pd.get_dummies(y_train)
one_hot_valid_y=pd.get_dummies(y_test)

In [None]:
opt = tf.optimizers.SGD(learning_rate=0.01)
model.compile(loss=tf.keras.losses.BinaryFocalCrossentropy(), optimizer=opt, metrics=['accuracy',tf.keras.metrics.Recall()])
epochs = 10
history=model.fit(X_train, one_hot_y,batch_size=4, epochs=epochs,validation_data=(X_test,one_hot_valid_y))

Epoch 1/10
Epoch 2/10

In [None]:
y_prob = model.predict(X_test)
prediction = y_prob.argmax(axis=-1)
print("Model NN")
print(classification_report(y_test,prediction,zero_division=0))

In [None]:
from imblearn.over_sampling import SMOTE
oversample = SMOTE()
X_balanced, y_balanced = oversample.fit_resample(X_train, y_train)

In [None]:
for key, classifier in classifiers.items():
    classifier.fit(X_balanced, y_balanced)
    prediction = classifier.predict(X_test)
    print("Model "+key)
    print(classification_report(y_test, prediction))

In [None]:
one_hot_y_balanced=pd.get_dummies(y_balanced)

In [None]:
model2=keras.Sequential([
keras.Input(shape=(35,)),
keras.layers.Dense(1024),
keras.layers.Activation("sigmoid"),
keras.layers.Dense(512),
keras.layers.Activation(activation="relu"),
keras.layers.Dense(2),
keras.layers.Activation(activation="softmax")])

In [None]:
opt = tf.optimizers.SGD(learning_rate=0.01)
model2.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=['accuracy',tf.keras.metrics.Recall()])
epochs = 10
history=model2.fit(X_balanced,one_hot_y_balanced,batch_size=4, epochs=epochs,validation_data=(X_test,one_hot_valid_y))

In [None]:
y_prob=model2.predict(X_test)
prediction = y_prob.argmax(axis=-1)
print("Model Neural Net")
print(classification_report(y_test,prediction,zero_division=0,target_names=int_to_labels_evm))