In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import torch.optim as optim

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
full_X_train_transformed = pd.read_csv('CSV/pre-processed/full_X_train_transformed')
full_y_train = pd.read_csv('CSV/pre-processed/full_y_train')
full_y_train = full_y_train.drop(columns=['id'])
full_y_train = full_y_train.iloc[:, 0]



full_X_test_transformed = pd.read_csv('CSV/pre-processed/full_X_test_transformed')


In [4]:
# Extracting only the image data from the full_X_train_transformed DataFrame
image_data_train = full_X_train_transformed.iloc[:, -40000:]
image_data_test = full_X_test_transformed.iloc[:, -40000:]

In [5]:
image_data_train.columns = range(40000)
image_data_test.columns = range(40000)

In [6]:
image_data_train = image_data_train.values.reshape(-1, 200, 200)
image_data_test = image_data_test.values.reshape(-1, 200, 200)

In [7]:
print(image_data_train.shape)
print(image_data_test.shape)

(990, 200, 200)
(594, 200, 200)


In [8]:
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(full_y_train)

# Splitting 20% of the training data as a validation set
X_train, X_val, y_train_encoded_split, y_val_encoded_split = train_test_split(
    image_data_train, y_train_encoded, test_size=0.2, stratify=y_train_encoded, random_state=42
)

print(f"Training set size: {X_train.shape}")
print(f"Validation set size: {X_val.shape}")


Training set size: (792, 200, 200)
Validation set size: (198, 200, 200)


In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, output_dim):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Calculate the size of the flattened output after the conv and pooling layers
        flattened_size = 64 * 50 * 50
        
        self.fc1 = nn.Linear(flattened_size, 512)
        self.fc2 = nn.Linear(512, output_dim)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 50 * 50)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
class IntermediateCNN(nn.Module):
    def __init__(self, output_dim):
        super(IntermediateCNN, self).__init__()

        # First Convolutional Block
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Second Convolutional Block
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully Connected Layers
        self.fc1 = nn.Linear(64 * 50 * 50, 512)
        self.fc_dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, output_dim)

    def forward(self, x):
        # First Convolutional Block
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))

        # Second Convolutional Block
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))

        # Flatten the tensor
        x = x.view(x.size(0), -1)

        # Fully Connected Layers
        x = F.relu(self.fc1(x))
        x = self.fc_dropout(x)
        x = self.fc2(x)

        return x


In [None]:
class EnhancedCNN(nn.Module):
    def __init__(self, output_dim):
        super(EnhancedCNN, self).__init__()

        # First Convolutional Block
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Second Convolutional Block
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully Connected Layers
        self.fc1 = nn.Linear(64 * 50 * 50, 512)
        self.fc_dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 256)
        self.fc_dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(256, output_dim)

    def forward(self, x):
        # First Convolutional Block
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))

        # Second Convolutional Block
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool2(F.relu(self.bn3(self.conv3(x))))

        # Flatten the tensor
        x = x.view(x.size(0), -1)

        # Fully Connected Layers
        x = F.relu(self.fc1(x))
        x = self.fc_dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.fc_dropout2(x)
        x = self.fc3(x)

        return x


In [9]:
class ComplexCNN(nn.Module):
    def __init__(self, output_dim):
        super(ComplexCNN, self).__init__()

        # First Convolutional Block
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Second Convolutional Block
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(128)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully Connected Layers
        self.fc1 = nn.Linear(128 * 50 * 50, 512)
        self.fc_dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 256)
        self.fc_dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(256, output_dim)

    def forward(self, x):
        # First Convolutional Block
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(F.relu(self.bn2(self.conv2(x))))

        # Second Convolutional Block
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool2(F.relu(self.bn4(self.conv4(x))))

        # Flatten the tensor
        x = x.view(x.size(0), -1)

        # Fully Connected Layers
        x = F.relu(self.fc1(x))
        x = self.fc_dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.fc_dropout2(x)
        x = self.fc3(x)

        return x


In [10]:
# Convert data to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train_encoded_split, dtype=torch.int64).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val_encoded_split, dtype=torch.int64).to(device)

X_test_array = image_data_test.reshape(-1, 1, 200, 200)
X_test_tensor = torch.tensor(X_test_array, dtype=torch.float32).to(device)

n_classes = full_y_train.nunique()

In [11]:
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

# CHOOSE MODEL BELOW

In [12]:
model = ComplexCNN(output_dim=n_classes) #Choose the model here

In [13]:
model.to(device)

print(model)

# Define the loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

ComplexCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320000, out_features=512, bias=True)
  (fc_dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out

In [14]:
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, path='models/CNN/best_model.pth'):
        """
        Early stops the training if validation loss doesn't improve after a given patience.
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.path = path

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model)
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        torch.save(model.state_dict(), self.path)


In [15]:
# Training parameters
epochs = 1000
batch_size = 10

# Early Stopping
early_stopping = EarlyStopping(patience=20, path='models/CNN/best_model.pth')

# Training loop
for epoch in range(epochs):
    model.train()  # Ensure the model is in training mode
    train_loss = 0.0
    for i in range(0, len(X_train_tensor), batch_size):
        # Determine end index for the current batch
        end_idx = min(i + batch_size, len(X_train_tensor))
        
        # Get the mini-batch data
        inputs = X_train_tensor[i:end_idx]
        labels = y_train_tensor[i:end_idx]

        # Ensure the input data has an additional channel dimension (assuming grayscale images)
        inputs = inputs.unsqueeze(1)  # Add a channel dimension
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Calculate average training loss for the epoch
    train_loss /= (len(X_train_tensor) / batch_size)

    # Validation phase
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
        for inputs, labels in val_loader:
            inputs = inputs.unsqueeze(1)  # Add a channel dimension
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    # Calculate average validation loss for the epoch
    val_loss /= len(val_loader)

    print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {train_loss}, Validation Loss: {val_loss}")

    # Early Stopping check
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping triggered.")
        break

print('Finished Training')


Epoch [1/1000], Training Loss: 5.4864472331422744, Validation Loss: 4.566591668128967
Epoch [2/1000], Training Loss: 4.667484730181068, Validation Loss: 4.550795555114746
Epoch [3/1000], Training Loss: 4.6445719942902075, Validation Loss: 4.553221321105957
Epoch [4/1000], Training Loss: 4.587035395882347, Validation Loss: 4.444158959388733
Epoch [5/1000], Training Loss: 4.558396059455293, Validation Loss: 4.383119773864746
Epoch [6/1000], Training Loss: 4.47414824155846, Validation Loss: 4.334078073501587
Epoch [7/1000], Training Loss: 4.426790629372452, Validation Loss: 4.275973963737488
Epoch [8/1000], Training Loss: 4.370572353252257, Validation Loss: 4.157574760913849
Epoch [9/1000], Training Loss: 4.24381404814094, Validation Loss: 3.990017855167389
Epoch [10/1000], Training Loss: 4.156033228142093, Validation Loss: 3.9506891489028932
Epoch [11/1000], Training Loss: 4.113315500394262, Validation Loss: 3.9247932195663453
Epoch [12/1000], Training Loss: 4.043553411358535, Validation

In [16]:
model = ComplexCNN(output_dim=n_classes)
model.load_state_dict(torch.load('models/CNN/best_model.pth'))
model.to(device)

ComplexCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320000, out_features=512, bias=True)
  (fc_dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out

In [17]:
# Set the model to evaluation mode
model.eval()

correct_predictions = 0
total_predictions = 0

# Prevent gradient calculations
with torch.no_grad():
    for i in range(0, len(X_val_tensor), batch_size):
        # Determine end index for the current batch
        end_idx = min(i + batch_size, len(X_val_tensor))
        
        # Get the mini-batch data
        inputs = X_val_tensor[i:end_idx]
        labels = y_val_tensor[i:end_idx]
        
        # Ensure the input data has an additional channel dimension (assuming grayscale images)
        inputs = inputs.unsqueeze(1)  # Add a channel dimension

        # Forward pass
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

accuracy = 100 * correct_predictions / total_predictions
print(f'Accuracy on the validation set: {accuracy:.2f}%')


Accuracy on the validation set: 65.66%


In [18]:
# Ensure the model is in evaluation mode
model.eval()

# Placeholder for the probabilities
test_probabilities = []

with torch.no_grad():
    for i in range(0, len(X_test_tensor), batch_size):
        # Get mini-batch
        inputs = X_test_tensor[i:i+batch_size]

        # Forward pass to get output/logits
        outputs = model(inputs)

        # Apply softmax to obtain probabilities
        probabilities = F.softmax(outputs, dim=1)
        
        test_probabilities.extend(probabilities.cpu().numpy())

# Read the sample_submission.csv
proba_df = pd.read_csv("CSV/sample_submission.csv")

# Replace the data in the columns (excluding the "id" column) with the computed probabilities
# Ensure that the columns in proba_df (excluding 'id') match the order and number of your model's output classes
proba_df.iloc[:, 1:] = test_probabilities

# Save to CSV file for submission
proba_df.to_csv('CNN_early_stopping_output.csv', index=False)
