In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from transformers import ViTForImageClassification, ViTConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import json

In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Currently running on:", device)

Currently running on: cuda


## Loading Data into DataLoaders

In [3]:
# Define Dataset class
class IcebergDataset(Dataset):
    def __init__(self, data, labels=None, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        if self.transform:
            image = self.transform(image)
        if self.labels is not None:
            label = self.labels[idx]
            return image, label
        return image

In [4]:
# Load and preprocess the data
def load_data(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    images = []
    labels = []

    for item in data:
        band_1 = np.array(item['band_1']).reshape(75, 75)
        band_2 = np.array(item['band_2']).reshape(75, 75)
        image = np.stack([band_1, band_2], axis=0)
        images.append(image)

        if 'is_iceberg' in item:
            labels.append(item['is_iceberg'])

    return np.array(images), np.array(labels) if labels else None

In [5]:
# Load train data
data_dir = "./data/processed/"
train_images, train_labels = load_data(data_dir+'train.json')
train_images = torch.tensor(train_images, dtype=torch.float32)
train_labels = torch.tensor(train_labels, dtype=torch.long)

In [6]:
# Split the data
train_images, val_images, train_labels, val_labels = train_test_split(
    train_images, train_labels, test_size=0.2, random_state=42
)

In [7]:
# Define data augmentation and normalization
transform = transforms.Compose([
    transforms.Normalize(mean=[-10.0, -10.0], std=[20.0, 20.0]),
])

In [8]:
# Create DataLoaders
train_dataset = IcebergDataset(train_images, train_labels, transform=transform)
val_dataset = IcebergDataset(val_images, val_labels, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

## Defining Models

In [9]:
def multi_class_accuracy(preds, y):
    _, predicted = torch.max(preds, 1)
    correct = (predicted == y).float()
    acc = correct.sum() / len(correct)
    return acc

In [10]:
def vit_multi_class_accuracy(preds, y):
    # Convert preds to a tensor if it's a list
    preds = torch.tensor(preds)
    y = torch.tensor(y)
    
    # Compute accuracy
    correct = (preds == y).float()
    acc = correct.sum() / len(correct)
    return acc.item()

In [19]:
class BasicCNN(nn.Module):
    def __init__(self):
        super(BasicCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(2, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 18 * 18, 128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x
    
# Training Loop
def train_basic_cnn(model, train_loader, val_loader, epochs=10, lr=1e-4, save_weights=False):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    train_losses, train_accs, val_losses, val_accs = [], [], [], []

    if save_weights:
        output_dir = "./models/"
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)

    best_val_loss = float('inf')
    best_val_acc = 0.0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        running_accuracy = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            running_accuracy += multi_class_accuracy(outputs, labels)

        val_loss = 0.0
        val_preds, val_targets = [], []
        model.eval()
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = torch.argmax(outputs, dim=1).cpu().numpy()
                val_preds.extend(preds)
                val_targets.extend(labels.cpu().numpy())

        val_accuracy = accuracy_score(val_targets, val_preds)
        train_losses.append(running_loss/len(train_loader))
        train_accs.append(running_accuracy/len(train_loader))
        val_losses.append(val_loss/len(val_loader))
        val_accs.append(val_accuracy)

        print('\n')
        print("="*120)
        print(f"Epoch: {epoch+1}/{epochs}" )
        print("="*120)
        print(f"Train Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {running_accuracy:.4f}\t|\tVal Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy:.4f}")
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_val_acc = val_accuracy
            if save_weights:
                torch.save(model.state_dict(), os.path.join(output_dir, f'basic_cnn_epoch_{epoch+1}.pth'))
            print(f"Model saved at epoch {epoch+1}")

    return train_losses, train_accs, val_losses, val_accs, best_val_acc

In [16]:
class BasicViT(nn.Module):
    def __init__(self, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, channels=3):
        super(BasicViT, self).__init__()

        self.image_size = image_size
        self.patch_size = patch_size
        self.num_classes = num_classes
        self.dim = dim
        self.depth = depth
        self.heads = heads
        self.mlp_dim = mlp_dim
        self.channels = channels

        # Create patches
        self.patch_embedding = nn.Conv2d(self.channels, self.dim, kernel_size=self.patch_size, stride=self.patch_size)

        # Transformer encoder
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.dim, nhead=self.heads, dim_feedforward=self.mlp_dim)
        self.encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.depth)

        # Classifier head
        self.classifier = nn.Linear(self.dim, self.num_classes)

    def forward(self, x):
        # Convert the image into patches
        x = self.patch_embedding(x)  # (batch_size, dim, num_patches_height, num_patches_width)
        x = x.flatten(2)  # Flatten the height and width dimensions (batch_size, dim, num_patches)

        # Transform and add positional encoding
        x = x.permute(2, 0, 1)  # (num_patches, batch_size, dim)
        x = self.encoder(x)

        # Use the output of the transformer for classification
        x = x.mean(dim=0)  # Global average pooling (over patches)
        logits = self.classifier(x)
        return logits
    
# Training Loop
def train_basic_vit(model, train_loader, val_loader, epochs=10, lr=1e-4, save_weights=False):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    train_losses, train_accs, val_accs = [], [], []

    if save_weights:
        output_dir = "./models/"
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)

    best_val_accuracy = 0.0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_preds = 0
        total_preds = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)
            correct_preds += torch.sum(preds == labels).item()
            total_preds += labels.size(0)

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct_preds / total_preds * 100
        
        model.eval()
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)

                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Compute accuracy and classification report
        val_accuracy = vit_multi_class_accuracy(all_labels, all_preds)
        train_losses.append(epoch_loss)
        train_accs.append(epoch_accuracy)
        val_accs.append(val_accuracy)

        print('\n')
        print("="*120)
        print(f"Epoch: {epoch+1}/{epochs}" )
        print("="*120)
        print(f"Train Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {epoch_accuracy:.4f}\t|\tVal Accuracy: {val_accuracy:.4f}")
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            if save_weights:
                torch.save(model.state_dict(), os.path.join(output_dir, f'basic_vit_epoch_{epoch+1}.pth'))
            print(f"Model saved at epoch {epoch+1}")

    return train_losses, train_accs, val_accs, best_val_accuracy

In [13]:
# Instantiate models
IMAGE_SIZE = 75
PATCH_SIZE = 16
NUM_CLASSES = 2
DIM = 256
DEPTH = 6
HEADS = 8
MLP_DIM = 512
CHANNELS = 2

# Basic CNN
basic_cnn = BasicCNN().to(device)

# Basic ViT
basic_vit = BasicViT(IMAGE_SIZE, PATCH_SIZE, NUM_CLASSES, DIM, DEPTH, HEADS, MLP_DIM, CHANNELS).to(device)



## Training

In [20]:
# Train Basic CNN
basic_cnn_train_losses, basic_cnn_train_accs, basic_cnn_valid_losses, basic_cnn_valid_accs, best_val_acc = train_basic_cnn(basic_cnn, train_loader, val_loader, epochs=100, lr=1e-4)



Epoch: 1/100
Train Loss: 0.0065, Train Accuracy: 41.0000	|	Val Loss: 0.4181, Val Accuracy: 0.8723
Model saved at epoch 1


Epoch: 2/100
Train Loss: 0.0040, Train Accuracy: 41.0000	|	Val Loss: 0.4306, Val Accuracy: 0.8660


Epoch: 3/100
Train Loss: 0.0036, Train Accuracy: 41.0000	|	Val Loss: 0.4396, Val Accuracy: 0.8629


Epoch: 4/100
Train Loss: 0.0033, Train Accuracy: 41.0000	|	Val Loss: 0.4536, Val Accuracy: 0.8723


Epoch: 5/100
Train Loss: 0.0037, Train Accuracy: 41.0000	|	Val Loss: 0.4367, Val Accuracy: 0.8754


Epoch: 6/100
Train Loss: 0.0026, Train Accuracy: 41.0000	|	Val Loss: 0.4962, Val Accuracy: 0.8629


Epoch: 7/100
Train Loss: 0.0025, Train Accuracy: 41.0000	|	Val Loss: 0.4533, Val Accuracy: 0.8629


Epoch: 8/100
Train Loss: 0.0031, Train Accuracy: 41.0000	|	Val Loss: 0.4849, Val Accuracy: 0.8629


Epoch: 9/100
Train Loss: 0.0018, Train Accuracy: 41.0000	|	Val Loss: 0.4895, Val Accuracy: 0.8567


Epoch: 10/100
Train Loss: 0.0016, Train Accuracy: 41.0000	|	Val Loss: 0.487

In [21]:
best_val_acc

0.8722741433021807

In [22]:
# Train Basic ViT
basic_vit_train_losses, basic_vit_train_accs, basic_vit_valid_accs, best_val_acc = train_basic_vit(basic_vit, train_loader, val_loader, epochs=100, lr=1e-4)



Epoch: 1/100
Train Loss: 0.0161, Train Accuracy: 99.3765	|	Val Accuracy: 0.8131
Model saved at epoch 1


Epoch: 2/100
Train Loss: 0.0212, Train Accuracy: 99.6882	|	Val Accuracy: 0.8380
Model saved at epoch 2


Epoch: 3/100
Train Loss: 0.0076, Train Accuracy: 99.8441	|	Val Accuracy: 0.8318


Epoch: 4/100
Train Loss: 0.0335, Train Accuracy: 99.0647	|	Val Accuracy: 0.8193


Epoch: 5/100
Train Loss: 0.0087, Train Accuracy: 99.6882	|	Val Accuracy: 0.8380


Epoch: 6/100
Train Loss: 0.0005, Train Accuracy: 100.0000	|	Val Accuracy: 0.8442
Model saved at epoch 6


Epoch: 7/100
Train Loss: 0.0044, Train Accuracy: 99.7662	|	Val Accuracy: 0.8380


Epoch: 8/100
Train Loss: 0.0058, Train Accuracy: 99.8441	|	Val Accuracy: 0.8287


Epoch: 9/100
Train Loss: 0.0228, Train Accuracy: 99.2206	|	Val Accuracy: 0.8380


Epoch: 10/100
Train Loss: 0.0144, Train Accuracy: 99.5323	|	Val Accuracy: 0.8442


Epoch: 11/100
Train Loss: 0.0032, Train Accuracy: 99.9221	|	Val Accuracy: 0.8349


Epoch: 12/100
Train Loss

In [23]:
best_val_acc

0.8566977977752686