In [1]:
import os
import random
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import torch

# Normalization values 
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label_idx = torch.tensor(label, dtype=torch.long)
        return image, label_idx

# Prepare and load datasets
def prepare_datasets(paths, labels, sample_size=30000, train_size=29000):
    train_data = []
    test_data = []
    for path, label in zip(paths, labels):
        images = [(os.path.join(path, img), label) for img in os.listdir(path) if img.endswith(('.png', '.jpg', '.jpeg'))]
        if len(images) > sample_size:
            images = random.sample(images, sample_size)
        train_data.extend(images[:train_size])
        test_data.extend(images[train_size:])
    return train_data, test_data

# Paths and labels for datasets
dataset_paths = ["C:\\Users\\ysang\\VOC2012\\JPEGImages", "C:\\Users\\ysang\\flickr30k_images", "C:\\Users\\ysang\\imagenet\\imagenet"]
dataset_labels = [0, 1, 2] 

# Prepare data
train_data, test_data = prepare_datasets(dataset_paths, dataset_labels)

# Transformation pipeline now includes resizing to 50x50
transform = transforms.Compose([
    transforms.Resize((50, 50)),  # Resize the image to 50x50
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

# Create datasets with the updated transform
train_dataset = CustomDataset(train_data, transform=transform)
test_dataset = CustomDataset(test_data, transform=transform)

# Create dataloaders as before
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

# Custom AlexNet for 32x32 input images
class AlexNetCustom(nn.Module):
    def __init__(self, num_classes=3):
        super(AlexNetCustom, self).__init__()
        self.features = nn.Sequential(
            # The kernel size, number of filters and strides are adjusted to handle 32x32 images
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),  # Output: 64 x 32 x 32
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 64 x 16 x 16
            nn.Conv2d(64, 192, kernel_size=3, padding=1),  # Output: 192 x 16 x 16
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 192 x 8 x 8
            nn.Conv2d(192, 384, kernel_size=3, padding=1),  # Output: 384 x 8 x 8
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),  # Output: 256 x 8 x 8
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),  # Output: 256 x 8 x 8
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output: 256 x 4 x 4
        )
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))  # Adjust to size 4x4 to match the feature map size
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 4 * 4, 1024),  # Adjusted to match the flattened feature map size
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initialize the model
model = AlexNetCustom(num_classes=3)

In [7]:
import torch
from torch.optim import lr_scheduler

# Placeholder for the training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Set the model to training mode

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')

    print('Training complete')

# Placeholder for the testing/evaluation loop
def evaluate_model(model, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    running_corrects = 0

    # Disabling gradient calculation is useful for inference
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / len(test_loader.dataset)
    total_acc = running_corrects.double() / len(test_loader.dataset)

    print(f'Test Loss: {total_loss:.4f}, Test Acc: {total_acc:.4f}')

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

# Evaluate the model
evaluate_model(model, test_loader, criterion)

Epoch 1/10 - Loss: 1.0985, Acc: 0.3494
Epoch 2/10 - Loss: 1.0985, Acc: 0.3505
Epoch 3/10 - Loss: 1.0985, Acc: 0.3506
Epoch 4/10 - Loss: 1.0985, Acc: 0.3514
Epoch 5/10 - Loss: 1.0985, Acc: 0.3509
Epoch 6/10 - Loss: 1.0985, Acc: 0.3509
Epoch 7/10 - Loss: 1.0985, Acc: 0.3496
Epoch 8/10 - Loss: 1.0985, Acc: 0.3508
Epoch 9/10 - Loss: 1.0985, Acc: 0.3511
Epoch 10/10 - Loss: 1.0985, Acc: 0.3502
Training complete
Test Loss: 1.0979, Test Acc: 0.5000
