In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model, save_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization,Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2, l1

import warnings
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
import pickle
import re
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_auc_score, roc_curve, log_loss
from sklearn.utils.class_weight import compute_class_weight


In [14]:
df = pd.read_csv('cleaned_train.csv')
df_test = pd.read_csv('cleaned_test.csv')

In [15]:
X = df.drop(['Credit_Score'], axis=1)
y = df['Credit_Score']

In [16]:
# make smote for unbalanced data
smote = SMOTE()
X, y = smote.fit_resample(X,y)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
                                                    stratify=y, random_state=42)

In [18]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
def eval_metric(model, X_train, y_train, X_test, y_test):
    y_train_pred_probabilities = model.predict(X_train)
    y_train_pred = y_train_pred_probabilities.argmax(axis=1)
    y_pred_probabilities = model.predict(X_test)
    y_pred = y_pred_probabilities.argmax(axis=1)

    print("Test Set:")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    print("\nTrain Set:")
    print(confusion_matrix(y_train, y_train_pred))
    print(classification_report(y_train, y_train_pred))

In [20]:
model_batchN = Sequential([
    Dense(512, input_dim=X_train.shape[1], activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.25),
    Dense(3, activation='softmax')
])
model_batchN.compile(optimizer = Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics = ['accuracy'])

early_stopping = EarlyStopping(monitor='val_accuracy',
                               patience=70,
                               restore_best_weights=True)

model_batchN.fit(x=X_train,
          y=y_train,
          validation_data=(X_test, y_test),
          validation_split=0.1,
          batch_size=512,
          epochs=800,
          verbose=1,
          callbacks=[early_stopping])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/800
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 51ms/step - accuracy: 0.6057 - loss: 0.9868 - val_accuracy: 0.7237 - val_loss: 0.7161
Epoch 2/800
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 42ms/step - accuracy: 0.7123 - loss: 0.7428 - val_accuracy: 0.7334 - val_loss: 0.6886
Epoch 3/800
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 47ms/step - accuracy: 0.7282 - loss: 0.7065 - val_accuracy: 0.7335 - val_loss: 0.6771
Epoch 4/800
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 51ms/step - accuracy: 0.7324 - loss: 0.6924 - val_accuracy: 0.7372 - val_loss: 0.6721
Epoch 5/800
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 40ms/step - accuracy: 0.7337 - loss: 0.6881 - val_accuracy: 0.7372 - val_loss: 0.6721
Epoch 6/800
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 58ms/step - accuracy: 0.7347 - loss: 0.6817 - val_accuracy: 0.7379 - val_loss: 0.6640
Epoch 7/800

<keras.src.callbacks.history.History at 0x7ab43a2bb1c0>

In [21]:
model_batchN.evaluate(X_train, y_train)


[1m3299/3299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.9181 - loss: 0.2277


[0.22653672099113464, 0.9188451766967773]

In [22]:
eval_metric(model_batchN, X_train, y_train, X_test, y_test)


[1m3299/3299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step
[1m583/583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
Test Set:
[[5364  612  232]
 [ 968 4385  855]
 [  70  386 5753]]
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      6208
           1       0.81      0.71      0.76      6208
           2       0.84      0.93      0.88      6209

    accuracy                           0.83     18625
   macro avg       0.83      0.83      0.83     18625
weighted avg       0.83      0.83      0.83     18625


Train Set:
[[33606  1311   263]
 [ 3999 28734  2447]
 [   15   530 34634]]
              precision    recall  f1-score   support

           0       0.89      0.96      0.92     35180
           1       0.94      0.82      0.87     35180
           2       0.93      0.98      0.96     35179

    accuracy                           0.92    105539
   macro avg       0.92      0.92      0.92    105539

In [24]:
model_batchN.save('my_model.keras')

In [25]:
df_test = pd.read_csv('cleaned_test.csv')
ID = df_test['ID']
X_test = df_test.drop('ID', axis=1)

In [26]:
y_test = model_batchN.predict(X_test)


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step


In [35]:
X_test_scaled = scaler.transform(X_test)

model_batchN = load_model('final_model_ANN_credit_score.h5')

predictions = model_batchN.predict(X_test_scaled)
predicted_classes = np.argmax(predictions, axis=1)  # Convert probabilities to class labels

predicted_labels = pd.Series(predicted_classes).map({0: 'Poor', 1: 'Standard', 2: 'Good'})

final = pd.DataFrame({'ID': df_test['ID'], 'Credit_Score': predicted_labels})

# save the results to a CSV file
final.to_csv('predictions_credit_score.csv', index=False)
print("Predictions saved to 'predictions_credit_score.csv'")




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
        ID Credit_Score
0   0x2145         Poor
1   0x7d59         Poor
2   0xe753         Poor
3  0x19813     Standard
4  0x1dc5e     Standard
Predictions saved to 'predictions_credit_score.csv'
