In [2]:
!pip install keras-tuner

Collecting keras-tuner
  Using cached keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Using cached kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Using cached keras_tuner-1.4.7-py3-none-any.whl (129 kB)
Using cached kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [4]:
import pandas as pd
from pathlib import Path
import scipy.stats as st
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras_tuner as kt

2025-01-13 15:47:47.252083: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
csv_path = Path("../Resources/train_cleaned.csv")
df = pd.read_csv(csv_path)
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,ID,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Monthly_Balance,Credit_Score,Credit_History_Age_Months
0,0x1608,19114.12,1824.843333,3,4,3,4,3,8,4.0,Good,809.98,22.537593,No,49.574949,178.344067,244.565317,Good,271
1,0x160f,34847.84,3037.986667,2,4,6,1,7,1,2.0,Good,605.03,38.550848,No,18.816215,40.391238,484.591214,Good,320
2,0x1612,34847.84,3037.986667,2,4,6,1,3,1,2.0,Good,605.03,34.977895,No,18.816215,130.11542,444.867032,Good,323
3,0x1613,34847.84,3037.986667,2,4,6,1,3,0,2.0,Good,605.03,33.38101,No,18.816215,43.47719,481.505262,Good,324
4,0x1615,34847.84,3037.986667,2,4,6,1,3,4,2.0,Good,605.03,32.933856,No,18.816215,218.904344,356.078109,Good,326


In [6]:
# Step 1: Encode the target variable
le = LabelEncoder()
df['Credit_Score'] = le.fit_transform(df['Credit_Score'])

# Step 2: Encode categorical variables
df = pd.get_dummies(df, columns=['Credit_Mix', 'Payment_of_Min_Amount'], drop_first=True)

# Step 3: Define features (X) and target (y)
X = df.drop(columns=['Credit_Score', 'ID'])
y = df['Credit_Score']

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# Step 5: Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
num_features = len(X_train.columns)

In [13]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=26, activation="tanh", input_dim=num_features))
nn_model.add(tf.keras.layers.Dense(units=21, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=5)



# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/5
[1m825/825[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3861 - loss: 0.0000e+00
Epoch 2/5
[1m825/825[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3988 - loss: 0.0000e+00
Epoch 3/5
[1m825/825[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4008 - loss: 0.0000e+00
Epoch 4/5
[1m825/825[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3973 - loss: 0.0000e+00
Epoch 5/5
[1m825/825[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.4020 - loss: 0.0000e+00
207/207 - 1s - 3ms/step - accuracy: 0.4005 - loss: 0.0000e+00
Loss: 0.0, Accuracy: 0.400515615940094


In [11]:
y_train.value_counts(normalize=True)

Credit_Score
2    0.528154
1    0.311189
0    0.160657
Name: proportion, dtype: float64

In [23]:
from sklearn.utils import class_weight
import numpy as np

# Compute class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights))

# Print to debug
print("Class weights:", class_weights_dict)
print("Unique classes in y_train:", np.unique(y_train))

# Fit the model
fit_model = nn_model.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    class_weight=class_weights_dict
)

Class weights: {0: 2.074817087561954, 1: 1.0711587669062994, 2: 0.6311292985856846}
Unique classes in y_train: [0 1 2]


KeyError: 3