# Testing with MLP(Multi-Layer Perceptron) on image data only

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
import torch.optim as optim

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
full_X_train_transformed = pd.read_csv('CSV/pre-processed/full_X_train_transformed')
full_y_train = pd.read_csv('CSV/pre-processed/full_y_train')
full_y_train = full_y_train.drop(columns=['id'])
full_y_train = full_y_train.iloc[:, 0]



full_X_test_transformed = pd.read_csv('CSV/pre-processed/full_X_test_transformed')


In [4]:
# Extracting only the image data from the full_X_train_transformed DataFrame
image_data_train = full_X_train_transformed.iloc[:, -40000:]
image_data_test = full_X_test_transformed.iloc[:, -40000:]

In [5]:
print(image_data_train.shape)
print(image_data_test.shape)
print(full_y_train.shape)

(990, 40000)
(594, 40000)
(990,)


In [6]:
image_data_train.columns = range(40000)
image_data_test.columns = range(40000)

In [7]:
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(full_y_train)

# Splitting 20% of the training data as a validation set
X_train, X_val, y_train_encoded_split, y_val_encoded_split = train_test_split(
    image_data_train, y_train_encoded, test_size=0.2, stratify=y_train_encoded, random_state=42
)

print(f"Training set size: {X_train.shape}")
print(f"Validation set size: {X_val.shape}")


Training set size: (792, 40000)
Validation set size: (198, 40000)


In [8]:
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 1024)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(1024, 512)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x


In [12]:
# Convert data to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train_encoded_split, dtype=torch.int64).to(device)
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val_encoded_split, dtype=torch.int64).to(device)

X_test_tensor = torch.tensor(image_data_test.values, dtype=torch.float32).to(device)

# Define the model
n_classes = full_y_train.nunique()
model = MLP(input_dim=40000, output_dim=n_classes)
model.to(device)

print(model)

# Define the loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

MLP(
  (fc1): Linear(in_features=40000, out_features=1024, bias=True)
  (dropout1): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=99, bias=True)
)


In [13]:
# Training parameters
epochs = 100
batch_size = 32

# Training loop
for epoch in range(epochs):
    for i in range(0, len(X_train_tensor), batch_size):
        # Determine end index for the current batch
        end_idx = min(i + batch_size, len(X_train_tensor))
        
        # Get the mini-batch data
        inputs = X_train_tensor[i:end_idx]
        labels = y_train_tensor[i:end_idx]

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Print statistics
    print(f"Epoch [{epoch+1}/{epochs}] - Loss: {loss.item()}")

print('Finished Training')

Epoch [1/100] - Loss: 4.655791759490967
Epoch [2/100] - Loss: 4.598121643066406
Epoch [3/100] - Loss: 4.5069661140441895
Epoch [4/100] - Loss: 4.515850067138672
Epoch [5/100] - Loss: 4.615506172180176
Epoch [6/100] - Loss: 4.664968013763428
Epoch [7/100] - Loss: 4.564562797546387
Epoch [8/100] - Loss: 4.419679641723633
Epoch [9/100] - Loss: 4.383508682250977
Epoch [10/100] - Loss: 4.086696147918701
Epoch [11/100] - Loss: 4.02334451675415
Epoch [12/100] - Loss: 3.7132883071899414
Epoch [13/100] - Loss: 3.72428035736084
Epoch [14/100] - Loss: 3.756972551345825
Epoch [15/100] - Loss: 3.4063806533813477
Epoch [16/100] - Loss: 3.2324485778808594
Epoch [17/100] - Loss: 2.9210243225097656
Epoch [18/100] - Loss: 3.2053515911102295
Epoch [19/100] - Loss: 3.055867910385132
Epoch [20/100] - Loss: 2.6801788806915283
Epoch [21/100] - Loss: 2.6555473804473877
Epoch [22/100] - Loss: 2.5806970596313477
Epoch [23/100] - Loss: 2.439872980117798
Epoch [24/100] - Loss: 2.1843314170837402
Epoch [25/100] - 

In [14]:
# Set the model to evaluation mode
model.eval()

correct_predictions = 0
total_predictions = 0

# Prevent gradient calculations
with torch.no_grad():
    for i in range(0, len(X_val_tensor), batch_size):
        # Determine end index for the current batch
        end_idx = min(i + batch_size, len(X_val_tensor))
        
        # Get the mini-batch data
        inputs = X_val_tensor[i:end_idx]
        labels = y_val_tensor[i:end_idx]

        # Forward pass
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

accuracy = 100 * correct_predictions / total_predictions
print(f'Accuracy on the validation set: {accuracy:.2f}%')


Accuracy on the validation set: 55.56%


In [15]:
# Ensure the model is in evaluation mode
model.eval()

# Placeholder for the probabilities
test_probabilities = []

with torch.no_grad():
    for i in range(0, len(X_test_tensor), batch_size):
        # Get mini-batch
        inputs = X_test_tensor[i:i+batch_size]

        # Forward pass to get output/logits
        outputs = model(inputs)

        # Apply softmax to obtain probabilities
        probabilities = F.softmax(outputs, dim=1)
        
        test_probabilities.extend(probabilities.cpu().numpy())

# Read the sample_submission.csv
proba_df = pd.read_csv("CSV/sample_submission.csv")

# Replace the data in the columns (excluding the "id" column) with the computed probabilities
# Ensure that the columns in proba_df (excluding 'id') match the order and number of your model's output classes
proba_df.iloc[:, 1:] = test_probabilities

# Save to CSV file for submission
proba_df.to_csv('MLP_output.csv', index=False)

56.06% accuracy on the validation set after 100 epoch training with LR = 0.0001