**Ann Classification**


In [None]:
from sklearn.calibration import LabelEncoder
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
from torch.utils.data import DataLoader, TensorDataset


In [None]:
data = pd.read_csv("loan_data.csv")
targets = data['loan_status']

data.drop('loan_status', axis=1, inplace=True)

In [None]:
encoder = LabelEncoder()
data["person_gender"] = encoder.fit_transform(data['person_gender'])
data["person_education"] = encoder.fit_transform(data['person_education'])
data["person_home_ownership"] = encoder.fit_transform(data['person_home_ownership'])
data["loan_intent"] = encoder.fit_transform(data['loan_intent'])
data['previous_loan_defaults_on_file'] = encoder.fit_transform(data['previous_loan_defaults_on_file'])

In [None]:
scaler = StandardScaler()
data = scaler.fit_transform(data)
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=42)
data_balanced, targets_balanced = ros.fit_resample(data, targets)



In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(data_balanced, targets_balanced, test_size=0.2, random_state=42)
X_train, X_validation, Y_train, Y_validation = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
X_validation = torch.FloatTensor(X_validation)
Y_train = torch.LongTensor(Y_train.values)
Y_validation = torch.LongTensor(Y_validation.values)
Y_test = torch.LongTensor(Y_test.values)

In [None]:
class Classification(nn.Module):
    def __init__(self, input_dim, hidden1, hidden2, hidden3, output_dim):
        super(Classification, self).__init__()
        self.W1 = nn.Parameter(torch.randn(input_dim, hidden1) * 0.01)
        self.b1 = nn.Parameter(torch.zeros(hidden1))
        self.W2 = nn.Parameter(torch.randn(hidden1, hidden2) * 0.01)
        self.b2 = nn.Parameter(torch.zeros(hidden2))
        self.W3 = nn.Parameter(torch.randn(hidden2, hidden3) * 0.01)
        self.b3 = nn.Parameter(torch.zeros(hidden3))
        self.W_out = nn.Parameter(torch.randn(hidden3, output_dim) * 0.01)
        self.b_out = nn.Parameter(torch.zeros(output_dim))
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, X):
        x = self.relu(torch.mm(X, self.W1) + self.b1)
        x = self.relu(torch.mm(x, self.W2) + self.b2)
        x = self.relu(torch.mm(x, self.W3) + self.b3)
        x = self.sigmoid(torch.mm(x, self.W_out) + self.b_out)
        return x

In [None]:
output_dim = len(targets.unique())
input_dim = X_train.shape[1]
hidden1 = 10
hidden2 = 19
hidden3 = 20

In [None]:
model = Classification(input_dim, hidden1, hidden2, hidden3, output_dim)

criterion = nn.CrossEntropyLoss()
learning_rate = 0.01

In [None]:
# %%
batch_size=32
epochs = 30
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

val_losses = []
accuracies = []
train_losses = []
train_data = TensorDataset(X_train, Y_train)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

val_data = TensorDataset(X_validation, Y_validation)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Define the training function
def train_model(epochs):
    for epoch in range(epochs):
        model.train()
        batch_loss = 0
        correct = 0
        total = 0
        for x_batch, y_batch in train_loader:
            yhat = model(x_batch)
            loss = criterion(yhat, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            batch_loss += loss.item()
            _, predicted = torch.max(yhat, 1)
            correct += (predicted == y_batch).sum().item()
            total += y_batch.size(0)
        train_loss = batch_loss / len(train_loader)
        train_losses.append(train_loss)
        accuracy = correct / total
        accuracies.append(accuracy)
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for x_batch, y_batch in val_loader:
                yhat = model(x_batch)
                loss = criterion(yhat, y_batch)
                val_loss += loss.item()  # Accumulate validation loss
                _, predicted = torch.max(yhat, 1)
                val_correct += (predicted == y_batch).sum().item()
                val_total += y_batch.size(0)

        val_loss /= len(val_loader)  # Average validation loss
        val_losses.append(val_loss)
        val_accuracy = val_correct / val_total  # Validation accuracy

        # Print epoch progress
        print(f"Epoch {epoch + 1}/{epochs}:")
        print(f"    Training Loss: {train_loss:.4f}, Accuracy: {accuracy:.4f}")
        print(f"    Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")


train_model(epochs)



In [None]:
model.eval()
with torch.no_grad():
    logits = model(X_test)
    predicted_classes = torch.argmax(logits, dim=1)

    # Convert PyTorch tensors to NumPy arrays
    predicted_classes_np = predicted_classes.cpu().numpy()  # Ensure it's on CPU
    Y_test_np = Y_test.cpu().numpy()  # Convert Y_test to NumPy (if it's a tensor)

    # Compute metrics
    print("F1 Score:", f1_score(Y_test_np, predicted_classes_np, average='weighted'))
    print("Recall Score:", recall_score(Y_test_np, predicted_classes_np, average='weighted'))
    print("Precision Score:", precision_score(Y_test_np, predicted_classes_np, average='weighted'))
    print("Confusion Matrix:\n", confusion_matrix(Y_test_np, predicted_classes_np))

In [None]:
import matplotlib.pyplot as plt
loss_values = [l for l in train_losses]
plt.figure(figsize=(20, 20))
plt.title("Loss With Respect to Iteration")
plt.plot(range(len(loss_values)), loss_values)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(range(len(train_losses)), train_losses, label='Training Loss', color='blue')
plt.plot(range(len(train_losses)), val_losses, label='Validation Loss', color='orange')
# Add labels and title
plt.xlabel('Epochs')
plt.ylabel('Loss ')
plt.title('Training & Validation Loss ')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()
