# Prepare data for processing

In [2]:
# Import necessary libraries
import os
import time
import copy
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import requests
import zipfile
import shutil
from tqdm.notebook import tqdm

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

Using device: cuda:0


In [6]:
def prepare_cifar10():
    """Prepare the CIFAR-10 dataset for PyTorch"""
    # Define transforms
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
    ])
    
    # Create datasets
    print("Downloading and preparing CIFAR-10 dataset...")
    trainset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=transform_train)
    
    # Split training set into train and validation
    train_size = 45000
    val_size = 5000
    trainset, valset = torch.utils.data.random_split(
        trainset, [train_size, val_size])
    
    # Test set
    testset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=transform_test)
    
    # Create data loaders
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=128, shuffle=True, num_workers=4)
    
    valloader = torch.utils.data.DataLoader(
        valset, batch_size=128, shuffle=False, num_workers=4)
    
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=128, shuffle=False, num_workers=4)
    
    # Class names
    class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
                   'dog', 'frog', 'horse', 'ship', 'truck']
    
    print(f"Dataset loaded with {train_size} training, {val_size} validation, and {len(testset)} test images.")
    print(f"Number of classes: {len(class_names)}")
    
    return trainloader, valloader, testloader, class_names

# Prepare data loaders
trainloader, valloader, testloader, class_names = prepare_cifar10()

Downloading and preparing CIFAR-10 dataset...


100%|██████████| 170M/170M [01:12<00:00, 2.36MB/s] 


Dataset loaded with 45000 training, 5000 validation, and 10000 test images.
Number of classes: 10
