Contents of main.py

In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from model import CNN_GSGD, GSGDOptimizer
from train import train, test
import os
import torch.nn as nn
from torch.utils.data import random_split

# Data loading, model setup, and main training loop code here

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [3]:
# # Define the path where the data should be stored
# data_path = './data'

# # Check if the data directory exists
# download_data = not os.path.exists(os.path.join(data_path, 'MNIST'))

# # Define the transform
# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize((0.1307,), (0.3081,))
# ])

# # Load the datasets, only downloading if necessary
# train_dataset = datasets.MNIST(data_path, train=True, download=download_data, transform=transform)
# test_dataset = datasets.MNIST(data_path, train=False, download=download_data, transform=transform)


In [4]:
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the path to the folder where your images are stored
data_path = './data/AlgX3_64x64_merged_tensors_with_labels'  # Update this with the path to your images

# Step 1: Set up transformations (similar to MNIST preprocessing)
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale if needed
    # transforms.Resize((28, 28)),  # Resize to 28x28 to match MNIST dimensions, adjust if needed
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to match MNIST-like preprocessing
])

# Step 2: Load the entire dataset using ImageFolder
full_dataset = datasets.ImageFolder(root=data_path, transform=transform)

# Step 3: Get input size dynamically from the first image in the dataset
sample_image, _ = full_dataset[0]  # Take the first image to determine the input shape
input_size = sample_image.shape  # This will be (C, H, W)

# Step 4: Split the dataset into training and testing sets
train_size = int(0.8 * len(full_dataset))  # 80% for training
test_size = len(full_dataset) - train_size  # Remaining 20% for testing
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Step 5: Create DataLoaders for both sets
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
# Split the train_dataset into training and validation datasets
train_size = int(0.8 * len(train_dataset))  # 80% for training
validation_size = len(train_dataset) - train_size  # Remaining 20% for validation
train_dataset, validation_dataset = random_split(train_dataset, [train_size, validation_size])

In [6]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
validation_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)  

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_GSGD(input_size=input_size, num_classes=len(full_dataset.classes)).to(device)

# model = EnhancedCNN_GSGD(num_classes=len(full_dataset.classes)).to(device)


# For Standard Guided SGD
# optimizer = GSGDOptimizer(model.parameters(), lr=0.01, method='sgd')

# For Guided SGD with Momentum
optimizer = GSGDOptimizer(model.parameters(), lr=0.01, method='momentum', momentum=0.9)

# For Guided Adam
# optimizer = GSGDOptimizer(model.parameters(), lr=0.001, method='adam', beta1=0.9, beta2=0.999)



In [9]:
# Define the loss function
loss_fn = nn.CrossEntropyLoss()

# Run training for each epoch, passing the dataset instead of DataLoader
for epoch in range(1, 20):
    train(model, device, train_loader.dataset, validation_loader.dataset, optimizer, epoch, loss_fn, 
      verification_set_num=4, rho=10, log_interval=9)
    # train(model, device, train_loader.dataset, optimizer, epoch, loss_fn)  # Pass train_loader.dataset
    test(model, device, test_loader)



Epoch: 1, Iteration: 1, Loss: 0.228636
Epoch: 1, Iteration: 10, Loss: 0.138525
Epoch: 1, Iteration: 19, Loss: 0.180092
Epoch: 1, Iteration: 28, Loss: 0.212048
Epoch: 1, Iteration: 37, Loss: 0.204245
Epoch: 1, Iteration: 46, Loss: 0.167363
Epoch: 1, Iteration: 55, Loss: 0.209436
Epoch: 1, Iteration: 64, Loss: 0.164147
Epoch: 1, Iteration: 73, Loss: 0.226425
Epoch 1 completed.

Test set: Average loss: 0.0003, Accuracy: 1414/1547 (91%)

Epoch: 2, Iteration: 1, Loss: 0.082218
Epoch: 2, Iteration: 10, Loss: 0.277550
Epoch: 2, Iteration: 19, Loss: 0.182232
Epoch: 2, Iteration: 28, Loss: 0.280135
Epoch: 2, Iteration: 37, Loss: 0.314680
Epoch: 2, Iteration: 46, Loss: 0.215636
Epoch: 2, Iteration: 55, Loss: 0.312456
Epoch: 2, Iteration: 64, Loss: 0.147495
Epoch: 2, Iteration: 73, Loss: 0.172012
Epoch 2 completed.

Test set: Average loss: 0.0003, Accuracy: 1420/1547 (92%)

Epoch: 3, Iteration: 1, Loss: 0.144128
Epoch: 3, Iteration: 10, Loss: 0.171519
Epoch: 3, Iteration: 19, Loss: 0.233982
Epoch