# Lab 3: Neural Networks

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchvision import datasets, transforms, models
from torch.nn.utils.rnn import pad_sequence

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

import zipfile
import re
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

torch.manual_seed(42)
np.random.seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

Device: cpu


## Task 1: Fully Connected Neural Network

In [2]:
df = pd.read_csv('creditcard.csv')
print(f'Dataset: {df.shape}')
print(f'Fraud: {df["Class"].sum()} ({df["Class"].mean()*100:.2f}%)')

X = df.drop('Class', axis=1).values
y = df['Class'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

train_loader = DataLoader(
    TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train).unsqueeze(1)),
    batch_size=256, shuffle=True
)
test_loader = DataLoader(
    TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test).unsqueeze(1)),
    batch_size=256, shuffle=False
)

Dataset: (284807, 31)
Fraud: 492 (0.17%)


In [3]:
class FCN(nn.Module):
    def __init__(self, input_dim):
        super(FCN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1)
        )
    
    def forward(self, x):
        return self.net(x)

model = FCN(X_train.shape[1]).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [4]:
epochs = 5
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader):.4f}')

Epoch 1/5, Loss: 0.0737
Epoch 2/5, Loss: 0.0054
Epoch 3/5, Loss: 0.0037
Epoch 4/5, Loss: 0.0033
Epoch 5/5, Loss: 0.0032


In [5]:
model.eval()
y_pred, y_true = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        output = model(X_batch)
        pred = (torch.sigmoid(output) > 0.5).float()
        y_pred.extend(pred.cpu().numpy())
        y_true.extend(y_batch.numpy())

y_pred = np.array(y_pred).flatten()
y_true = np.array(y_true).flatten()

print(f'\nAccuracy: {accuracy_score(y_true, y_pred):.6f}')
print(f'F1-Score: {f1_score(y_true, y_pred):.4f}')
print(classification_report(y_true, y_pred, target_names=['Normal', 'Fraud']))


Accuracy: 0.999263
F1-Score: 0.7805
              precision    recall  f1-score   support

      Normal       1.00      1.00      1.00     85295
       Fraud       0.81      0.76      0.78       148

    accuracy                           1.00     85443
   macro avg       0.90      0.88      0.89     85443
weighted avg       1.00      1.00      1.00     85443



In [6]:
lab1_results = {
    'Random Forest': 0.999508,
    'kNN': 0.999368,
    'Decision Tree': 0.999368,
    'SVM': 0.999345,
    'AdaBoost': 0.999099,
    'Fully Connected NN': accuracy_score(y_true, y_pred)
}

results_df = pd.DataFrame(list(lab1_results.items()), columns=['Model', 'Accuracy'])
results_df = results_df.sort_values('Accuracy', ascending=False)
print(results_df)

                Model  Accuracy
0       Random Forest  0.999508
1                 kNN  0.999368
2       Decision Tree  0.999368
3                 SVM  0.999345
5  Fully Connected NN  0.999263
4            AdaBoost  0.999099


## Task 2: CNN

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_full = datasets.SVHN(root='./data', split='train', download=True, transform=transform)
test_dataset = datasets.SVHN(root='./data', split='test', download=True, transform=transform)

train_dataset, _ = random_split(train_full, [10000, len(train_full) - 10000])

train_loader_img = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader_img = DataLoader(test_dataset, batch_size=128, shuffle=False)

print(f'Train: {len(train_dataset)}, Test: {len(test_dataset)}')

Train: 10000, Test: 26032


### Task 2a: Simple CNN

In [8]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(torch.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 4 * 4)
        x = self.dropout(torch.relu(self.fc1(x)))
        return self.fc2(x)

cnn = SimpleCNN().to(device)
criterion_cnn = nn.CrossEntropyLoss()
optimizer_cnn = optim.Adam(cnn.parameters(), lr=0.001)

In [9]:
epochs = 3
for epoch in range(epochs):
    cnn.train()
    train_loss = 0
    for imgs, labels in train_loader_img:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer_cnn.zero_grad()
        output = cnn(imgs)
        loss = criterion_cnn(output, labels)
        loss.backward()
        optimizer_cnn.step()
        train_loss += loss.item()
    
    cnn.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in test_loader_img:
            imgs, labels = imgs.to(device), labels.to(device)
            output = cnn(imgs)
            _, pred = torch.max(output, 1)
            total += labels.size(0)
            correct += (pred == labels).sum().item()
    
    acc = 100 * correct / total
    print(f'Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader_img):.4f}, Val Acc: {acc:.2f}%')

simple_cnn_acc = acc

Epoch 1/3, Loss: 2.1991, Val Acc: 35.86%
Epoch 2/3, Loss: 1.2410, Val Acc: 77.17%
Epoch 3/3, Loss: 0.7979, Val Acc: 80.45%


### Task 2b: Transfer Learning

In [10]:
resnet = models.resnet18(pretrained=True)

for param in resnet.parameters():
    param.requires_grad = False

resnet.fc = nn.Linear(resnet.fc.in_features, 10)
resnet = resnet.to(device)

optimizer_resnet = optim.Adam(resnet.fc.parameters(), lr=0.001)

In [11]:
epochs = 3
for epoch in range(epochs):
    resnet.train()
    train_loss = 0
    for imgs, labels in train_loader_img:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer_resnet.zero_grad()
        output = resnet(imgs)
        loss = criterion_cnn(output, labels)
        loss.backward()
        optimizer_resnet.step()
        train_loss += loss.item()
    
    resnet.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in test_loader_img:
            imgs, labels = imgs.to(device), labels.to(device)
            output = resnet(imgs)
            _, pred = torch.max(output, 1)
            total += labels.size(0)
            correct += (pred == labels).sum().item()
    
    acc = 100 * correct / total
    print(f'Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader_img):.4f}, Val Acc: {acc:.2f}%')

resnet_acc = acc

Epoch 1/3, Loss: 2.2451, Val Acc: 25.01%
Epoch 2/3, Loss: 2.0305, Val Acc: 28.29%
Epoch 3/3, Loss: 1.9621, Val Acc: 29.06%


In [12]:
print(f'\nSimple CNN: {simple_cnn_acc:.2f}%')
print(f'ResNet18 (pretrained): {resnet_acc:.2f}%')
print(f'Difference: {resnet_acc - simple_cnn_acc:.2f}%')


Simple CNN: 80.45%
ResNet18 (pretrained): 29.06%
Difference: -51.40%


## Task 3: Text Classification

In [13]:
with zipfile.ZipFile('sms+spam+collection.zip', 'r') as z:
    z.extractall('sms_data')

texts, labels = [], []
with open('sms_data/SMSSpamCollection', 'r', encoding='utf-8') as f:
    for line in f:
        label, text = line.strip().split('\t', 1)
        labels.append(1 if label == 'spam' else 0)
        texts.append(text.lower())

print(f'Messages: {len(texts)}, Spam: {sum(labels)} ({sum(labels)/len(labels)*100:.2f}%)')

Messages: 5574, Spam: 747 (13.40%)


In [14]:
def preprocess(text):
    return ' '.join(re.sub(r'[^a-z\s]', '', text).split())

texts = [preprocess(t) for t in texts]

all_words = []
for text in texts:
    all_words.extend(text.split())

word_counts = Counter(all_words)
vocab = {'<PAD>': 0, '<UNK>': 1}
for word, _ in word_counts.most_common(4998):
    vocab[word] = len(vocab)

print(f'Vocab size: {len(vocab)}')

Vocab size: 5000


In [15]:
def to_seq(text, max_len=100):
    seq = [vocab.get(w, vocab['<UNK>']) for w in text.split()]
    if len(seq) < max_len:
        seq += [0] * (max_len - len(seq))
    else:
        seq = seq[:max_len]
    return seq

sequences = [to_seq(t) for t in texts]

X_train, X_test, y_train, y_test = train_test_split(
    sequences, labels, test_size=0.2, random_state=42, stratify=labels
)

train_loader_text = DataLoader(
    TensorDataset(torch.LongTensor(X_train), torch.FloatTensor(y_train).unsqueeze(1)),
    batch_size=64, shuffle=True
)
test_loader_text = DataLoader(
    TensorDataset(torch.LongTensor(X_test), torch.FloatTensor(y_test).unsqueeze(1)),
    batch_size=64, shuffle=False
)

In [25]:
num_pos = sum(y_train)
num_neg = len(y_train) - num_pos

# FIX: Invert the division. We want Negative / Positive
# This will result in a weight > 1 (approx 6.4), forcing the model to pay attention to Spam
pos_weight_val = num_neg / num_pos
pos_weight = torch.tensor([pos_weight_val]).to(device)

print(f"Corrected Positive Weight: {pos_weight.item():.4f}")

# Re-define loss with correct weight
criterion_lstm = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

Corrected Positive Weight: 6.4565


### Task 3a: LSTM with Random Embeddings

In [26]:
class LSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(LSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_dim, 1)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = self.dropout(self.embedding(x))
        _, (h, _) = self.lstm(x)
        return self.fc(self.dropout(h[-1]))

lstm_random = LSTM(len(vocab), 128, 256).to(device)

# Add class weights for imbalanced data
pos_weight = torch.tensor([sum(y_train) / (len(y_train) - sum(y_train))]).to(device)
criterion_lstm = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer_lstm = optim.Adam(lstm_random.parameters(), lr=0.0001)

In [27]:
lstm_random = LSTM(len(vocab), 128, 256).to(device)

optimizer_lstm = optim.Adam(lstm_random.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    lstm_random.train()
    train_loss = 0
    for seqs, lbls in train_loader_text:
        seqs, lbls = seqs.to(device), lbls.to(device)
        optimizer_lstm.zero_grad()
        output = lstm_random(seqs)
        loss = criterion_lstm(output, lbls)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(lstm_random.parameters(), 1.0)
        optimizer_lstm.step()
        train_loss += loss.item()
    
    # Evaluation
    lstm_random.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for seqs, lbls in test_loader_text:
            seqs, lbls = seqs.to(device), lbls.to(device)
            output = lstm_random(seqs)
            pred = (torch.sigmoid(output) > 0.5)
            correct += (pred == lbls).sum().item()
            total += lbls.size(0)
    
    acc = 100 * correct / total
    if epoch == 0 or (epoch + 1) % 2 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader_text):.4f}, Val Acc: {acc:.2f}%')

lstm_random_acc = acc

Epoch 1/10, Loss: 0.1393, Val Acc: 86.64%
Epoch 2/10, Loss: 0.0999, Val Acc: 86.64%
Epoch 4/10, Loss: 0.0993, Val Acc: 86.64%
Epoch 6/10, Loss: 0.0996, Val Acc: 86.64%
Epoch 8/10, Loss: 0.0993, Val Acc: 86.64%
Epoch 10/10, Loss: 0.0995, Val Acc: 86.64%


at this point idk what to do, dataset is not balanced, so it just clasify everything as ham

### Task 3b: LSTM with GloVe Embeddings

In [21]:
import urllib.request
import os

glove_file = 'glove.6B.100d.txt'
if not os.path.exists(glove_file):
    print('Downloading GloVe...')
    urllib.request.urlretrieve('http://nlp.stanford.edu/data/glove.6B.zip', 'glove.6B.zip')
    print('Extracting GloVe...')
    with zipfile.ZipFile('glove.6B.zip', 'r') as z:
        z.extract(glove_file)
    os.remove('glove.6B.zip')

glove = {}
with open(glove_file, 'r', encoding='utf-8') as f:
    for line in f:
        vals = line.split()
        glove[vals[0]] = np.array(vals[1:], dtype='float32')

embed_matrix = np.zeros((len(vocab), 100))
found = 0
for word, idx in vocab.items():
    if word in glove:
        embed_matrix[idx] = glove[word]
        found += 1
    else:
        embed_matrix[idx] = np.random.normal(0, 0.1, 100)

print(f'Found {found}/{len(vocab)} words in GloVe')

Found 4076/5000 words in GloVe


In [22]:
class LSTMGloVe(nn.Module):
    def __init__(self, embed_matrix, hidden_dim):
        super(LSTMGloVe, self).__init__()
        vocab_size, embed_dim = embed_matrix.shape
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.embedding.weight = nn.Parameter(torch.FloatTensor(embed_matrix))
        self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers=2, batch_first=True, dropout=0.3)
        self.fc = nn.Linear(hidden_dim, 1)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = self.dropout(self.embedding(x))
        _, (h, _) = self.lstm(x)
        return self.fc(self.dropout(h[-1]))

lstm_glove = LSTMGloVe(embed_matrix, 256).to(device)
optimizer_glove = optim.Adam(filter(lambda p: p.requires_grad, lstm_glove.parameters()), lr=0.0001)

In [None]:
lstm_glove = LSTMGloVe(embed_matrix, 256).to(device)

# INCREASED LR to 0.001
optimizer_glove = optim.Adam(filter(lambda p: p.requires_grad, lstm_glove.parameters()), lr=0.001)

epochs = 10
for epoch in range(epochs):
    lstm_glove.train()
    train_loss = 0
    for seqs, lbls in train_loader_text:
        seqs, lbls = seqs.to(device), lbls.to(device)
        optimizer_glove.zero_grad()
        output = lstm_glove(seqs)
        loss = criterion_lstm(output, lbls) # Uses the fixed weight from Step 1
        loss.backward()
        torch.nn.utils.clip_grad_norm_(lstm_glove.parameters(), 1.0)
        optimizer_glove.step()
        train_loss += loss.item()
    
    lstm_glove.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for seqs, lbls in test_loader_text:
            seqs, lbls = seqs.to(device), lbls.to(device)
            output = lstm_glove(seqs)
            pred = (torch.sigmoid(output) > 0.5)
            correct += (pred == lbls).sum().item()
            total += lbls.size(0)
    
    acc = 100 * correct / total
    if epoch == 0 or (epoch + 1) % 2 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader_text):.4f}, Val Acc: {acc:.2f}%')

lstm_glove_acc = acc

Epoch 1/5, Loss: 0.4170, Val Acc: 86.64%
Epoch 2/5, Loss: 0.3940, Val Acc: 86.64%
Epoch 3/5, Loss: 0.3973, Val Acc: 86.64%
Epoch 4/5, Loss: 0.3981, Val Acc: 86.64%
Epoch 5/5, Loss: 0.3959, Val Acc: 86.64%


In [24]:
print(f'\nRandom Embeddings: {lstm_random_acc:.2f}%')
print(f'GloVe Embeddings: {lstm_glove_acc:.2f}%')
print(f'Improvement: {lstm_glove_acc - lstm_random_acc:.2f}%')
print(f'FCN (Fraud Detection): {accuracy_score(y_true, y_pred):.6f}')
print(f'Simple CNN (SVHN): {simple_cnn_acc:.2f}%')
print(f'ResNet18 (SVHN): {resnet_acc:.2f}%')
print(f'LSTM Random (SMS): {lstm_random_acc:.2f}%')
print(f'LSTM GloVe (SMS): {lstm_glove_acc:.2f}%')


Random Embeddings: 86.64%
GloVe Embeddings: 86.64%
Improvement: 0.00%
FCN (Fraud Detection): 0.999263
Simple CNN (SVHN): 80.45%
ResNet18 (SVHN): 29.06%
LSTM Random (SMS): 86.64%
LSTM GloVe (SMS): 86.64%


# Model Performance Results

| Model | Dataset | Accuracy |
|-------|---------|----------|
| Random Embeddings | SMS | 86.64% |
| GloVe Embeddings | SMS | 86.64% |
| **Improvement** | | **0.00%** |
| FCN | Fraud Detection | 99.93% |
| Simple CNN | SVHN | 80.45% |
| ResNet18 | SVHN | 29.06% |
| LSTM Random | SMS | 86.64% |
| LSTM GloVe | SMS | 86.64% |
