<a href="https://colab.research.google.com/github/Andicleomj/Machine-Learning/blob/main/Week14/RNN%20dan%20Deep%20RNN%20model/Bank%20Marketing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

####Rizki Aprilia Rahman
####1103213007
####Week 14 RNN dan Deep RNN model

In [1]:
# Install PyTorch (skip if already installed)
!pip install torch torchvision torchaudio --quiet

In [2]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [3]:
# Load dataset
def load_data():
    url = "/content/sample_data/bank-full.csv"
    df = pd.read_csv(url, sep=';')

    # Preprocess data
    X = df.drop(columns=['y'])
    y = df['y']

    # Convert categorical to numerical
    X = pd.get_dummies(X)
    le = LabelEncoder()
    y = le.fit_transform(y)

    # Standardize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Create custom dataset class
class BankDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)  # Add sequence length dimension
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [10]:
# Define RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, pooling='max'):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.pooling = pooling

        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        if self.pooling == 'max':
            out, _ = torch.max(out, 1)
        elif self.pooling == 'avg':
            out = torch.mean(out, 1)
        out = self.fc(out)
        return out

In [11]:
# Train function
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25):
    train_loader, val_loader = dataloaders
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        scheduler.step()

        val_loss = 0.0
        val_preds = []
        val_labels = []

        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)

                preds = torch.argmax(outputs, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        train_loss = running_loss / len(train_loader.dataset)
        val_loss = val_loss / len(val_loader.dataset)
        val_acc = accuracy_score(val_labels, val_preds)

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    return model


In [12]:
# Main Experiment
if __name__ == "__main__":
    X_train, X_test, y_train, y_test = load_data()
    train_dataset = BankDataset(X_train, y_train)
    val_dataset = BankDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

    dataloaders = (train_loader, val_loader)

    input_size = X_train.shape[1]
    output_size = len(np.unique(y_train))

    hidden_sizes = [16, 32, 64]
    poolings = ['max', 'avg']
    optimizers = {'SGD': optim.SGD, 'RMSProp': optim.RMSprop, 'Adam': optim.Adam}
    epochs_list = [5, 50, 100, 250, 350]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    results = []

    for hidden_size in hidden_sizes:
        for pooling in poolings:
            for optimizer_name, optimizer_fn in optimizers.items():
                for num_epochs in epochs_list:
                    print(f"\nHidden Size: {hidden_size}, Pooling: {pooling}, Optimizer: {optimizer_name}, Epochs: {num_epochs}")

                    model = RNNModel(input_size, hidden_size, output_size, pooling=pooling)
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optimizer_fn(model.parameters(), lr=0.01)
                    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

                    trained_model = train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs)

                    # Evaluate on test set
                    test_preds = []
                    test_labels = []

                    trained_model.eval()
                    with torch.no_grad():
                        for inputs, labels in val_loader:
                            inputs, labels = inputs.to(device), labels.to(device)
                            outputs = trained_model(inputs)
                            preds = torch.argmax(outputs, dim=1)
                            test_preds.extend(preds.cpu().numpy())
                            test_labels.extend(labels.cpu().numpy())

                    test_acc = accuracy_score(test_labels, test_preds)
                    print(f"Test Accuracy: {test_acc:.4f}")

                    results.append((hidden_size, pooling, optimizer_name, num_epochs, test_acc))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 225/250, Train Loss: 0.1384, Val Loss: 0.2707, Val Acc: 0.8942
Epoch 226/250, Train Loss: 0.1384, Val Loss: 0.2708, Val Acc: 0.8941
Epoch 227/250, Train Loss: 0.1384, Val Loss: 0.2712, Val Acc: 0.8942
Epoch 228/250, Train Loss: 0.1384, Val Loss: 0.2714, Val Acc: 0.8937
Epoch 229/250, Train Loss: 0.1385, Val Loss: 0.2714, Val Acc: 0.8941
Epoch 230/250, Train Loss: 0.1384, Val Loss: 0.2710, Val Acc: 0.8935
Epoch 231/250, Train Loss: 0.1384, Val Loss: 0.2710, Val Acc: 0.8936
Epoch 232/250, Train Loss: 0.1384, Val Loss: 0.2716, Val Acc: 0.8934
Epoch 233/250, Train Loss: 0.1384, Val Loss: 0.2716, Val Acc: 0.8941
Epoch 234/250, Train Loss: 0.1384, Val Loss: 0.2713, Val Acc: 0.8946
Epoch 235/250, Train Loss: 0.1384, Val Loss: 0.2711, Val Acc: 0.8944
Epoch 236/250, Train Loss: 0.1383, Val Loss: 0.2726, Val Acc: 0.8937
Epoch 237/250, Train Loss: 0.1384, Val Loss: 0.2715, Val Acc: 0.8943
Epoch 238/250, Train Loss: 0.1384, Val