In [15]:
!pip install pandas numpy scikit-learn torch



In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
column_names = ['checking_account', 'duration', 'credit_history', 'purpose', 'credit_amount',
                'savings_account', 'employment', 'installment_rate', 'personal_status_sex',
                'debtors_guarantors', 'residence', 'property', 'age', 'other_installment_plans',
                'housing', 'credits', 'job', 'liable_people', 'telephone', 'foreign_worker', 'credit_risk']
data = pd.read_csv(url, names=column_names, delimiter=' ')

# Preprocess the categorical variables using one-hot encoding
data = pd.get_dummies(data, columns=['checking_account', 'credit_history', 'purpose', 'savings_account', 'employment',
                                     'personal_status_sex', 'debtors_guarantors', 'property', 'other_installment_plans',
                                     'housing', 'job', 'telephone', 'foreign_worker'])

# Split the data into features (X) and target (y)
X = data.drop(columns=['credit_risk'])
y = data['credit_risk'].map({1: 0, 2: 1})  # Map the target variable to 0 (good risk) and 1 (bad risk)


In [17]:

# Standardize the continuous features
# Standardize the continuous features
continuous_features = ['duration', 'credit_amount', 'installment_rate', 'age', 'credits', 'liable_people']
scaler = StandardScaler()
X[continuous_features] = scaler.fit_transform(X[continuous_features])
# Split the data into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
# Convert the data into PyTorch tensors

# Convert all columns to float32
X_train = X_train.astype(np.float32)
X_val = X_val.astype(np.float32)
X_test = X_test.astype(np.float32)

# Convert the data into PyTorch tensors
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_val = torch.tensor(X_val.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_val = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)



In [18]:
class CreditScoringModel(nn.Module):
    def __init__(self, input_dim):
        super(CreditScoringModel, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.layer2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.layer3(x)
        x = self.sigmoid(x)
        return x

# Instantiate the model
model = CreditScoringModel(X_train.shape[1])

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
n_epochs = 50
batch_size = 32

for epoch in range(n_epochs):
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        y_pred = model(X_batch)

        # Calculate the loss
        loss = criterion(y_pred, y_batch)

        # Backward pass
        loss.backward()

        # Update the weights
        optimizer.step()

    # Calculate the validation loss
    y_val_pred = model(X_val)
    val_loss = criterion(y_val_pred, y_val)
    print(f'Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}')


Epoch 1/50, Loss: 0.5950955748558044, Validation Loss: 0.6103551983833313
Epoch 2/50, Loss: 0.561471700668335, Validation Loss: 0.5869834423065186
Epoch 3/50, Loss: 0.5629627108573914, Validation Loss: 0.5701571702957153
Epoch 4/50, Loss: 0.5227663516998291, Validation Loss: 0.5375768542289734
Epoch 5/50, Loss: 0.5099400281906128, Validation Loss: 0.5100570321083069
Epoch 6/50, Loss: 0.461517870426178, Validation Loss: 0.5108264684677124
Epoch 7/50, Loss: 0.4795411229133606, Validation Loss: 0.48736611008644104
Epoch 8/50, Loss: 0.4156927466392517, Validation Loss: 0.5020230412483215
Epoch 9/50, Loss: 0.3824921250343323, Validation Loss: 0.5134875774383545
Epoch 10/50, Loss: 0.4427129328250885, Validation Loss: 0.5200778841972351
Epoch 11/50, Loss: 0.4253544807434082, Validation Loss: 0.5406981706619263
Epoch 12/50, Loss: 0.36217278242111206, Validation Loss: 0.5232943296432495
Epoch 13/50, Loss: 0.3856695592403412, Validation Loss: 0.5294525027275085
Epoch 14/50, Loss: 0.3254415392875

In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Make predictions on the test set
y_pred = model(X_test)
y_pred = (y_pred.detach().numpy() > 0.5).astype(int)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
confusion_mat = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion_mat)
print("Classification Report:\n", report)


Accuracy: 0.755
Confusion Matrix:
 [[111  30]
 [ 19  40]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.85      0.79      0.82       141
         1.0       0.57      0.68      0.62        59

    accuracy                           0.76       200
   macro avg       0.71      0.73      0.72       200
weighted avg       0.77      0.76      0.76       200



In [20]:
torch.save(model.state_dict(), "credit_risk_model_1.pth")


In [21]:
torch.save(model.state_dict(), "credit_risk_model_2.pth")