In [1]:
#train.py

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
import time
import os

# ======================
# Simplified CNN Model
# ======================
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        
        # First convolutional block - reduced filters
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Second convolutional block - reduced filters
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Third convolutional block - reduced filters
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Simplified fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, 10)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# ======================
# Training function
# ======================
def train(model, loader, optimizer, criterion, device, scheduler=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, labels) in enumerate(loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
    if scheduler:
        scheduler.step()
        
    train_loss = running_loss / len(loader)
    train_acc = 100. * correct / total
    return train_loss, train_acc

# ======================
# Validation function
# ======================
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
    val_loss = running_loss / len(loader)
    val_acc = 100. * correct / total
    return val_loss, val_acc

def main():
    start_time = time.time()
    
    # Set device - prioritize GPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Using device:', device)

    # Define transforms - basic transform for faster processing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    # Augmentation only for training
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    # Load datasets with different transforms
    train_val_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=train_transform)
    
    # Use less validation data for faster evaluation
    val_size = 5000  # Fixed validation size
    train_size = len(train_val_dataset) - val_size
    train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])

    # Increase batch size for faster training
    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, 
                             num_workers=4, pin_memory=True if torch.cuda.is_available() else False)
    val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, 
                           num_workers=2, pin_memory=True if torch.cuda.is_available() else False)

    # Initialize model
    model = MyCNN().to(device)
    
    # Use mixed precision training if available (for newer GPUs)
    scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    
    # Learning rate scheduler - reduce learning rate when plateauing
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

    # Reduce number of epochs
    num_epochs = 60
    best_val_acc = 0
    patience = 3
    counter = 0

    for epoch in range(num_epochs):
        epoch_start = time.time()
        
        train_loss, train_acc = train(model, train_loader, optimizer, criterion, device, scheduler)
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        epoch_time = time.time() - epoch_start
        
        print(f"Epoch [{epoch+1}/{num_epochs}] - Time: {epoch_time:.2f}s")
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.2f}%")
        
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print('Saved Best Model!')
            counter = 0
        else:
            counter += 1
            
        # Early stopping
        if counter >= patience:
            print(f"Early stopping after {epoch+1} epochs")
            break
            
        # Check if total training time is approaching 4 hours
        total_time = time.time() - start_time
        hours = total_time / 3600
        if hours > 3.5:  # Stop if approaching 4 hours
            print(f"Training time limit approaching ({hours:.2f} hours). Stopping training.")
            break
            
    total_time = time.time() - start_time
    hours, remainder = divmod(total_time, 3600)
    minutes, seconds = divmod(remainder, 60)
    print(f"Total training time: {int(hours)}h {int(minutes)}m {int(seconds)}s")

if __name__ == "__main__":
    main()

Using device: cpu
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:12<00:00, 13.8MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Epoch [1/60] - Time: 77.00s
Train Loss: 1.5996 | Train Acc: 41.83%
Val   Loss: 1.3667 | Val   Acc: 51.24%
Saved Best Model!
Epoch [2/60] - Time: 77.95s
Train Loss: 1.1842 | Train Acc: 57.57%
Val   Loss: 1.1992 | Val   Acc: 57.00%
Saved Best Model!
Epoch [3/60] - Time: 77.31s
Train Loss: 1.0341 | Train Acc: 63.22%
Val   Loss: 1.2161 | Val   Acc: 59.54%
Saved Best Model!
Epoch [4/60] - Time: 77.53s
Train Loss: 0.9474 | Train Acc: 66.64%
Val   Loss: 0.9155 | Val   Acc: 67.56%
Saved Best Model!
Epoch [5/60] - Time: 79.57s
Train Loss: 0.8769 | Train Acc: 69.12%
Val   Loss: 0.8479 | Val   Acc: 69.68%
Saved Best Model!
Epoch [6/60] - Time: 85.35s
Train Loss: 0.7747 | Train Acc: 73.00%
Val   Loss: 0.7718 | Val   Acc: 72.10%
Saved Best Model!
Epoch [7/60] - Time: 78.88s
Train Loss: 0.7324 | Train Acc: 74.50%
Val   Loss: 0.7579 | Val   Acc: 73.52%
Saved Best Model!
Epoch [8/60] - Time: 77.99s
Train Loss: 0.7115 | Train Acc: 75.24%
Val   Loss: 0.

In [2]:
#test.py

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time

# ======================
# Simplified CNN Model - Same as in train.py
# ======================
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        
        # First convolutional block - reduced filters
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Second convolutional block - reduced filters
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Third convolutional block - reduced filters
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Simplified fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, 10)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# ======================
# Test function
# ======================
def test(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    class_correct = [0] * 10
    class_total = [0] * 10
    
    # Time tracking
    start_time = time.time()
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            # Per-class accuracy
            for i in range(labels.size(0)):
                label = labels[i]
                pred = predicted[i]
                if label == pred:
                    class_correct[label] += 1
                class_total[label] += 1
    
    test_acc = 100. * correct / total
    
    # Compute inference time
    inference_time = time.time() - start_time
    
    return test_acc, class_correct, class_total, inference_time

def main():
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Using device:', device)

    # Define transforms - same normalization as training but no augmentation
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])

    # Load test dataset
    test_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=transform)
    
    # Class names for reporting
    classes = ('plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck')

    # DataLoader with larger batch size for faster inference
    test_loader = DataLoader(
        test_dataset, 
        batch_size=512, 
        shuffle=False, 
        num_workers=4, 
        pin_memory=True if torch.cuda.is_available() else False
    )

    # Initialize model
    model = MyCNN().to(device)
    
    # Load best model
    try:
        model.load_state_dict(torch.load('/kaggle/input/model-v4-1/best_model_v4_1.pth'))
        print("Successfully loaded model from 'best_model.pth'")
    except Exception as e:
        print(f"Error loading model: {e}")
        return
    
    # Run test
    test_acc, class_correct, class_total, inference_time = test(model, test_loader, device)
    
    # Print overall accuracy
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Inference Time: {inference_time:.2f} seconds")
    
    # Print per-class accuracy
    print("\nPer-class accuracy:")
    for i in range(10):
        class_acc = 100 * class_correct[i] / class_total[i]
        print(f'{classes[i]}: {class_acc:.2f}%')

if __name__ == "__main__":
    main()

Using device: cpu
Files already downloaded and verified
Successfully loaded model from 'best_model.pth'


  model.load_state_dict(torch.load('/kaggle/input/model-v4-1/best_model_v4_1.pth'))


Test Accuracy: 84.49%
Inference Time: 8.90 seconds

Per-class accuracy:
plane: 87.30%
car: 92.10%
bird: 74.60%
cat: 69.40%
deer: 86.40%
dog: 77.40%
frog: 88.60%
horse: 86.40%
ship: 91.80%
truck: 90.90%
