In [17]:
import os
import random
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import torch

# Normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label_idx = torch.tensor(label, dtype=torch.long)
        return image, label_idx

# Prepare and load datasets
def prepare_datasets(paths, labels, sample_size=30000, train_size=20000):
    train_data = []
    test_data = []
    for path, label in zip(paths, labels):
        images = [(os.path.join(path, img), label) for img in os.listdir(path) if img.endswith(('.png', '.jpg', '.jpeg'))]
        if len(images) > sample_size:
            images = random.sample(images, sample_size)
        train_data.extend(images[:train_size])
        test_data.extend(images[train_size:])
    return train_data, test_data

# Paths and labels for datasets
dataset_paths = ["C:\\Users\\ysang\\VOC2012\\JPEGImages", "C:\\Users\\ysang\\flickr30k_images", "C:\\Users\\ysang\\imagenet\\imagenet"]
dataset_labels = [0, 1, 2]

# Prepare data
train_data, test_data = prepare_datasets(dataset_paths, dataset_labels)

# Transformation pipeline
transform = transforms.Compose([
    transforms.Resize((227, 227)), #enhanced resolution
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

# Dataloaders
train_dataset = CustomDataset(train_data, transform=transform)
test_dataset = CustomDataset(test_data, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [18]:
from torchvision.models import alexnet
import torch.nn as nn
import torch.optim as optim

# Define a simplified version of AlexNet for 3-class classification
class SimpleAlexNet(nn.Module):
    def __init__(self, num_classes=3):
        super(SimpleAlexNet, self).__init__()
        original_model = alexnet(pretrained=True)
        self.features = original_model.features
        self.avgpool = original_model.avgpool
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, optimizer, and loss function
model = SimpleAlexNet(num_classes=3).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [19]:
import torch
from torch.optim import lr_scheduler

# Placeholder for the training loop
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Set the model to training mode

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')

    print('Training complete')

# Placeholder for the testing/evaluation loop
def evaluate_model(model, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    running_corrects = 0

    # Disabling gradient calculation is useful for inference
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / len(test_loader.dataset)
    total_acc = running_corrects.double() / len(test_loader.dataset)

    print(f'Test Loss: {total_loss:.4f}, Test Acc: {total_acc:.4f}')

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

# Evaluate the model
evaluate_model(model, test_loader, criterion)


Epoch 1/10 - Loss: 1.1170, Acc: 0.3497
Epoch 2/10 - Loss: 1.0963, Acc: 0.3490
Epoch 3/10 - Loss: 1.0962, Acc: 0.3495
Epoch 4/10 - Loss: 1.0961, Acc: 0.3497
Epoch 5/10 - Loss: 1.0962, Acc: 0.3465
Epoch 6/10 - Loss: 1.0961, Acc: 0.3481
Epoch 7/10 - Loss: 1.0961, Acc: 0.3500
Epoch 8/10 - Loss: 1.0961, Acc: 0.3515
Epoch 9/10 - Loss: 1.0961, Acc: 0.3496
Epoch 10/10 - Loss: 1.0961, Acc: 0.3488
Training complete
Test Loss: 1.0522, Test Acc: 0.5000
