# Task 3: Stacked autoencoder (with 3 autoencoders) based pre-training of a DFNN based classifier for Image dataset 3
- Model of AANN: 5-layer structure
- Mode of learning for AANNs: Mini-batch mode
- Stopping criterion: Change in average error below a threshold
- Weight update rule: AdaM
- Report should include the confusion matrices for training data and test data, for
    1. DFNN trained using only labeled data, 
    2. DFNN trained using a stacked autoencoder pretrained using unlabeled data and finetuned using labeled data. DFNN configuration should be the same in both (a) and (b).

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
labeled_training_data = pd.read_csv("task 3/training_data_set_26_labeled.csv", header = None)
print(f"The shape of labeled training data is {labeled_training_data.shape}")
print()

unlabeled_training_data = pd.read_csv("task 3/training_data_set_26_unlabeled.csv", header = None)
print(f"The shape of unlabeled training data is {unlabeled_training_data.shape}")
print()

validation_data = pd.read_csv("task 3/validation_data_set_26.csv", header = None)
print(f"The shape of labeled validation data is {validation_data.shape}")
print()

testing_data = pd.read_csv("task 3/testing_data_set_26.csv", header = None)
print(f"The shape of labeled testing data is {testing_data.shape}")
print()

The shape of labeled training data is (750, 37)

The shape of unlabeled training data is (1750, 36)

The shape of labeled validation data is (250, 37)

The shape of labeled testing data is (250, 37)



In [3]:
class CustomDataset(Dataset):
    def __init__(self, df):
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        data = torch.FloatTensor(self.df.iloc[idx, :-1])
        label = torch.tensor(self.df.iloc[idx, -1], dtype=torch.long)
        return data, label

In [4]:
train_dataloader = DataLoader(CustomDataset(labeled_training_data), batch_size = 32, shuffle = True)
val_dataloader = DataLoader(CustomDataset(validation_data), batch_size = 32, shuffle = False)
test_dataloader = DataLoader(CustomDataset(testing_data), batch_size = 32, shuffle = False)

In [5]:
# Create Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_dim),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        
        return x

In [6]:
# Pretraining Autoencoder
# Pretrain autoencoders layer by layer
def pretrain_autoencoders(data, encoding_dims, batch_size=32, lr=0.001, epochs=50, threshold=1e-4):
    dataloader = DataLoader(data, batch_size=batch_size, shuffle=True)
    inputs = data
    pretrained_weights = []
    
    for encoding_dim in encoding_dims:
        input_dim = inputs.shape[1]
        autoencoder = Autoencoder(input_dim, encoding_dim)
        criterion = nn.MSELoss()
        optimizer = optim.Adam(autoencoder.parameters(), lr=lr)
        
        for epoch in range(epochs):
            epoch_loss = 0.0
            for batch in dataloader:
                batch = batch.to(torch.float32)
                optimizer.zero_grad()
                outputs = autoencoder(batch)
                loss = criterion(outputs, batch)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
            
            avg_loss = epoch_loss / len(dataloader)
            
            if epoch % 10 == 0:
                print(f"The average loss is {avg_loss}", end = "\n")
                
            if avg_loss < threshold:
                break
        
        pretrained_weights.append(autoencoder.encoder[0].weight.data.clone())
        inputs = autoencoder.encoder(torch.tensor(data, dtype=torch.float32)).detach().numpy()
    
    return pretrained_weights

In [7]:
# Get the pretrained_weights
pretrained_weights = pretrain_autoencoders(unlabeled_training_data.values, [18])

The average loss is 0.11666821105913683
The average loss is 0.006691737320612777
The average loss is 0.006527620071375912
The average loss is 0.006220273233272813
The average loss is 0.005672135872935707


In [8]:
class DFNN(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, pretrained_weights=None):
        super(DFNN, self).__init__()
        layers = []
        previous_dim = input_dim
        
        for i, hidden_dim in enumerate(hidden_dims):
            layer = nn.Linear(previous_dim, hidden_dim)
            if pretrained_weights and i < len(pretrained_weights):
                layer.weight.data = pretrained_weights[i]
            layers.append(layer)
            layers.append(nn.ReLU())
            previous_dim = hidden_dim
        
        layers.append(nn.Linear(previous_dim, output_dim))
        layers.append(nn.Softmax(dim=1))
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

# Train DFNN
def train_dfnn(model, train_loader, val_loader, lr=0.001, epochs=50):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        model.train()
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        # Evaluate on test data
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                outputs = model(batch_x)
                _, predicted = torch.max(outputs, 1)
                total += batch_y.size(0)
                correct += (predicted == batch_y).sum().item()
        
        if epoch % 10 == 0: print(f"Epoch {epoch+1}/{epochs}, Val Accuracy: {100 * correct / total:.2f}%")

In [9]:
# Generate confusion matrix
def evaluate_model(model, data_loader):
    y_true, y_pred = [], []
    model.eval()
    with torch.no_grad():
        for batch_x, batch_y in data_loader:
            outputs = model(batch_x)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(batch_y.numpy())
            y_pred.extend(predicted.numpy())
    
    cm = confusion_matrix(y_true, y_pred)
    print(classification_report(y_true, y_pred))
    return cm

In [11]:
# Train DFNN with pretraining
print("Train DFNN with pretraining...")
dfnn_pretrained = DFNN(input_dim=36, hidden_dims=[18], output_dim=5, pretrained_weights=pretrained_weights)
train_dfnn(dfnn_pretrained, train_dataloader, val_dataloader, epochs = 100)
print()

# Train DFNN without pretraining
print("Train DFNN without pretraining...")
dfnn_scratch = DFNN(input_dim=36, hidden_dims=[18], output_dim=5)
train_dfnn(dfnn_scratch, train_dataloader, val_dataloader, epochs = 100)
print()

# Evaluate and compare
print("Evaluating Pretrained")
cm_pretrained = evaluate_model(dfnn_pretrained, test_dataloader)
print()

print("Evaluating Scratch model")
cm_scratch = evaluate_model(dfnn_scratch, test_dataloader)

Train DFNN with pretraining...
Epoch 1/100, Val Accuracy: 22.40%


  label = torch.tensor(self.df.iloc[idx, -1], dtype=torch.long)


Epoch 11/100, Val Accuracy: 33.20%
Epoch 21/100, Val Accuracy: 36.80%
Epoch 31/100, Val Accuracy: 38.80%
Epoch 41/100, Val Accuracy: 39.60%
Epoch 51/100, Val Accuracy: 38.80%
Epoch 61/100, Val Accuracy: 41.60%
Epoch 71/100, Val Accuracy: 42.40%
Epoch 81/100, Val Accuracy: 41.60%
Epoch 91/100, Val Accuracy: 40.00%

Train DFNN without pretraining...
Epoch 1/100, Val Accuracy: 22.40%
Epoch 11/100, Val Accuracy: 32.00%
Epoch 21/100, Val Accuracy: 34.80%
Epoch 31/100, Val Accuracy: 37.20%
Epoch 41/100, Val Accuracy: 38.40%
Epoch 51/100, Val Accuracy: 39.20%
Epoch 61/100, Val Accuracy: 40.80%
Epoch 71/100, Val Accuracy: 43.20%
Epoch 81/100, Val Accuracy: 42.80%
Epoch 91/100, Val Accuracy: 42.80%

Evaluating Pretrained
              precision    recall  f1-score   support

           0       0.48      0.41      0.44        51
           1       0.39      0.51      0.44        57
           2       0.31      0.33      0.32        52
           3       0.55      0.48      0.51        44
       

  label = torch.tensor(self.df.iloc[idx, -1], dtype=torch.long)
