In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# NOTE modifications here
# Since preprocessing performed the validation split, we have to do this here
X_train_combined = pd.read_csv('CSV/pre-processed/training/X_train_augmented.csv')
y_train = pd.read_csv('CSV/pre-processed/training/y_train_augmented.csv')
y_train = y_train.drop(columns=['id'])
y_train = y_train.iloc[:, 0]

X_val_combined = pd.read_csv('CSV/pre-processed/training/X_validation_augmented.csv')
y_val = pd.read_csv('CSV/pre-processed/training/y_validation_augmented.csv')
y_val = y_val.drop(columns=['id'])
y_val = y_val.iloc[:, 0]

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_val_encoded = encoder.transform(y_val)

X_test_transformed = pd.read_csv('CSV/pre-processed/training/X_test_augmented.csv')

In [4]:
# Splitting and shuffling has already been done, so only need to
# create variables that reference preprocessed data
X_train_combined, X_val_combined, y_train_encoded_split, y_val_encoded_split = (
    X_train_combined, X_val_combined, y_train_encoded, y_val_encoded
)
full_X_test_transformed = X_test_transformed

# Extract the image data for the training and validation sets
X_train_image = X_train_combined.iloc[:, -40000:].values.reshape(-1, 200, 200)
X_val_image = X_val_combined.iloc[:, -40000:].values.reshape(-1, 200, 200)
X_test_image = full_X_test_transformed.iloc[:, -40000:].values.reshape(-1, 200, 200)

# Convert grayscale images to 3-channel (RGB) format
X_train_rgb = np.stack((X_train_image,) * 3, axis=-1)
X_val_rgb = np.stack((X_val_image,) * 3, axis=-1)
X_test_rgb = np.stack((X_test_image,) * 3, axis=-1)

# Convert the RGB numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_rgb.transpose(0, 3, 1, 2), dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val_rgb.transpose(0, 3, 1, 2), dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_rgb.transpose(0, 3, 1, 2), dtype=torch.float32).to(device)

# Normalize using ImageNet statistics
mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
X_train_normalized = ((X_train_tensor - mean) / std)
X_val_normalized = ((X_val_tensor - mean) / std)
X_test_normalized = ((X_test_tensor - mean) / std)

# Extracting the CSV data for training and validation sets
csv_X_train = X_train_combined.iloc[:, :-40000]
csv_X_val = X_val_combined.iloc[:, :-40000]
csv_X_test = full_X_test_transformed.iloc[:, :-40000]

# Convert CSV data to PyTorch tensors and move them to the device
csv_X_train_tensor = torch.tensor(csv_X_train.values, dtype=torch.float32).to(device)
csv_X_val_tensor = torch.tensor(csv_X_val.values, dtype=torch.float32).to(device)
csv_X_test_tensor = torch.tensor(csv_X_test.values, dtype=torch.float32).to(device)


In [5]:
print(f"Normalized Image Training tensor shape: {X_train_normalized.shape}")
print(f"Normalized Image Validation tensor shape: {X_val_normalized.shape}")
print(f"Normalized Image Test tensor shape: {X_test_normalized.shape}\n")

# Not sure why these numbers changed from the original
print(f"CSV Training tensor shape: {csv_X_train_tensor.shape}")
print(f"CSV Validation tensor shape: {csv_X_val_tensor.shape}")
print(f"CSV Test tensor shape: {csv_X_test_tensor.shape}")


Normalized Image Training tensor shape: torch.Size([3564, 3, 200, 200])
Normalized Image Validation tensor shape: torch.Size([99, 3, 200, 200])
Normalized Image Test tensor shape: torch.Size([594, 3, 200, 200])

CSV Training tensor shape: torch.Size([3564, 193])
CSV Validation tensor shape: torch.Size([99, 193])
CSV Test tensor shape: torch.Size([594, 193])


In [6]:
# NOTE more additions here
#full_X_train = pd.read_csv('CSV/pre-processed/training/full_X_train_augmented.csv')
full_y_train = pd.read_csv('CSV/pre-processed/training/full_y_train_augmented.csv')
full_y_train = full_y_train.drop(columns=['id'])
full_y_train = full_y_train.iloc[:, 0]

In [7]:
from torchvision.models.resnet import ResNet152_Weights
from torch.optim.lr_scheduler import StepLR

# Define the model using ResNet152
n_classes = full_y_train.nunique()
resnet_model = models.resnet152(pretrained=False)  # Load the ResNet152 WITHOUT ImageNet weights

resnet_features = resnet_model.fc.in_features  # Storing the number of features

resnet_model.fc = nn.Identity()  # Replacing the final layer

csv_input_dim = csv_X_train.shape[1]
csv_hidden_dim1 = 1024  # Increased from 512
csv_hidden_dim2 = 768  # New hidden layer
csv_hidden_dim3 = 512  # New hidden layer
dropout_rate = 0.5     # Dropout rate

csv_model = nn.Sequential(
    nn.Linear(csv_input_dim, csv_hidden_dim1),
    nn.BatchNorm1d(csv_hidden_dim1),
    nn.ReLU(),
    nn.Dropout(dropout_rate),
    
    nn.Linear(csv_hidden_dim1, csv_hidden_dim2),
    nn.BatchNorm1d(csv_hidden_dim2),
    nn.ReLU(),
    nn.Dropout(dropout_rate),
    
    nn.Linear(csv_hidden_dim2, csv_hidden_dim3),
    nn.BatchNorm1d(csv_hidden_dim3),
    nn.ReLU(),
    nn.Dropout(dropout_rate),
    
    nn.Linear(csv_hidden_dim3, csv_hidden_dim3),
    nn.BatchNorm1d(csv_hidden_dim3),
    nn.ReLU(),
    nn.Dropout(dropout_rate),
)

learning_rate = 0.001

class CombinedModel(nn.Module):
    def __init__(self, resnet_model, csv_model, output_dim, resnet_features):
        super(CombinedModel, self).__init__()
        self.resnet_model = resnet_model
        self.csv_model = csv_model
        self.fc = nn.Linear(resnet_features + csv_hidden_dim3, output_dim)  # Ensure correct dimensions
        
    def forward(self, image_data, csv_data):
        x1 = self.resnet_model(image_data)
        x2 = self.csv_model(csv_data)
        x = torch.cat((x1, x2), dim=1)
        x = self.fc(x)
        return x


model = CombinedModel(resnet_model, csv_model, n_classes, resnet_features)
model.to(device)
print(model)

# Define the loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Convert label data to tensors
y_train_tensor = torch.tensor(y_train_encoded_split, dtype=torch.int64).to(device)
y_val_tensor = torch.tensor(y_val_encoded_split, dtype=torch.int64).to(device)

# Create datasets
train_dataset = TensorDataset(X_train_normalized, csv_X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_normalized, csv_X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_normalized, csv_X_test_tensor)



CombinedModel(
  (resnet_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(

In [8]:
# Training parameters
epochs = 10000
batch_size = 4
patience = 30 # Number of epochs to wait for improvement before stopping

# Define StepLR scheduler. Let's say we decrease the learning rate by 0.1 every 30 epochs
STEP_SIZE = 10
GAMMA = 0.7
scheduler = StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [9]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [10]:
# Extract model details
base_name = "ResNet152"
csv_layers = f"CSV_L{csv_input_dim}-{csv_hidden_dim1}-{csv_hidden_dim2}-{csv_hidden_dim3}"  # Model architecture
dropout_info = f"Dropout{int(dropout_rate*100)}"  # Dropout percentage
lr_info = f"lr{learning_rate}"

# Get the current date and time
from datetime import datetime
current_datetime = datetime.now().strftime('%Y%m%d_%H%M')

# Formulate the name
checkpoint_name = f"{base_name}_{csv_layers}_{dropout_info}_{lr_info}_SS{STEP_SIZE}_bs{batch_size}_{current_datetime}_data_augmentation_100patience"
checkpoint_path = f"model_checkpoints/multi_modal/{checkpoint_name}"

print(f"Model will be saved as: {checkpoint_name}")

# Path to save the best model
checkpoint_path = f"model_checkpoints/{base_name}/{checkpoint_name}.pth"

# Ensure the directory exists
os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True)

Model will be saved as: ResNet152_CSV_L193-1024-768-512_Dropout50_lr0.001_SS10_bs4_20231129_1845_data_augmentation_100patience


In [11]:
best_val_loss = float('inf')  # Initialize with a high value
epochs_without_improvement = 0

# Training loop
for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for image_inputs, csv_inputs, labels in train_loader:
        # Move inputs and labels to device
        image_inputs, csv_inputs, labels = image_inputs.to(device), csv_inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(image_inputs, csv_inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * image_inputs.size(0)

    train_loss = running_loss / len(train_loader.dataset)
    
    # Compute validation loss
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():
        for image_inputs, csv_inputs, labels in val_loader:
            image_inputs, csv_inputs, labels = image_inputs.to(device), csv_inputs.to(device), labels.to(device)
            outputs = model(image_inputs, csv_inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * image_inputs.size(0)
    
    val_loss /= len(val_loader.dataset)
    
    # Check for early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_without_improvement = 0
        # Save the best model weights
        torch.save(model.state_dict(), checkpoint_path)
    else:
        epochs_without_improvement += 1
        
    # Print statistics
    print(f"Epoch [{epoch+1}/{epochs}] - Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    scheduler.step()

    if epochs_without_improvement >= patience:
        print(f"Early stopping after {epoch+1} epochs!")
        break

print('Finished Training')


Epoch [1/10000] - Training Loss: 5.2976, Validation Loss: 4.7602
Epoch [2/10000] - Training Loss: 4.6164, Validation Loss: 4.5318
Epoch [3/10000] - Training Loss: 4.1706, Validation Loss: 4.4476
Epoch [4/10000] - Training Loss: 3.7103, Validation Loss: 3.6094
Epoch [5/10000] - Training Loss: 3.3769, Validation Loss: 3.4293
Epoch [6/10000] - Training Loss: 3.0809, Validation Loss: 3.3058
Epoch [7/10000] - Training Loss: 2.7808, Validation Loss: 2.7608
Epoch [8/10000] - Training Loss: 2.4926, Validation Loss: 2.6254
Epoch [9/10000] - Training Loss: 2.2619, Validation Loss: 2.1730
Epoch [10/10000] - Training Loss: 1.9944, Validation Loss: 1.9354
Epoch [11/10000] - Training Loss: 1.5939, Validation Loss: 1.5483
Epoch [12/10000] - Training Loss: 1.4055, Validation Loss: 1.3044
Epoch [13/10000] - Training Loss: 1.2610, Validation Loss: 1.2423
Epoch [14/10000] - Training Loss: 1.1527, Validation Loss: 1.2266
Epoch [15/10000] - Training Loss: 1.0503, Validation Loss: 1.0501
Epoch [16/10000] - 

In [12]:
# load the best model
model.load_state_dict(torch.load(checkpoint_path))

# Update the DataLoader to include both image and CSV tensors
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

correct_predictions = 0
total_predictions = 0

model.eval()  # Set the model to evaluation mode

with torch.no_grad():
    for image_inputs, csv_inputs, labels in val_loader:
        image_inputs, csv_inputs, labels = image_inputs.to(device), csv_inputs.to(device), labels.to(device)
        
        # Forward pass with both image and CSV data
        outputs = model(image_inputs, csv_inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

accuracy = 100 * correct_predictions / total_predictions
print(f'Accuracy on the validation set: {accuracy:.2f}%')


Accuracy on the validation set: 78.79%


In [None]:
# Save model parameters
save_path = f'models/{base_name}/{accuracy:.2f}_{checkpoint_name}'

# Ensure the directory exists
os.makedirs(os.path.dirname(save_path), exist_ok=True)

torch.save(model.state_dict(), save_path)

### outputing predictions to csv

In [None]:
model_path = 'models/ResNet152/94.95_ResNet152_CSV_L193-1024-768-512_Dropout50_lr1e-05_SS40_bs4_20231125_1559_data_augmentation'

In [None]:
model.load_state_dict(torch.load(model_path))


test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

all_probs = []
with torch.no_grad():
    for image_inputs, csv_inputs in test_loader:
        image_inputs, csv_inputs = image_inputs.to(device), csv_inputs.to(device)
        
        # Get the logits from the model
        logits = model(image_inputs, csv_inputs)
        
        # Convert logits to probabilities
        probs = F.softmax(logits, dim=1)
        all_probs.append(probs.cpu().numpy())

# Concatenate all probabilities
all_probs_np = np.concatenate(all_probs, axis=0)

# Read the sample_submission.csv
proba_df = pd.read_csv("CSV/sample_submission.csv")

# Replace the data in the columns (excluding the "id" column) with the computed probabilities
proba_df.iloc[:, 1:] = all_probs_np

In [None]:
proba_df.to_csv('checkpoint2(3).csv', index=False)