In [1]:
import torch
torch.cuda.is_available()

True

In [3]:
# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available. Using GPU.")
    device = torch.device("cuda")
else:
    print("CUDA is not available. Using CPU.")
    device = torch.device("cpu")

CUDA is available. Using GPU.


In [4]:
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import time

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 300
batch_size = 64
learning_rate = 0.001

# CIFAR10 Dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

# CNN Architecture
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(8 * 8 * 64, 1000)
        self.fc2 = nn.Linear(1000, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

model = ConvNet().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
start_time = time.time()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (images, labels) in enumerate(trainloader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader)}')

end_time = time.time()
training_time = end_time - start_time
print(f'Training completed in: {training_time:.2f} seconds')

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the model on the 10000 test images: {100 * correct / total}%')

# Save the model checkpoint
torch.save(model.state_dict(), 'cifar10_model.pth')


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/300], Loss: 1.4632187949879396
Epoch [2/300], Loss: 1.1980260968055871
Epoch [3/300], Loss: 1.0893741055675175
Epoch [4/300], Loss: 1.0323071856327983
Epoch [5/300], Loss: 0.9888744435042066
Epoch [6/300], Loss: 0.9557599480194814
Epoch [7/300], Loss: 0.9328523074726924
Epoch [8/300], Loss: 0.9057292453849407
Epoch [9/300], Loss: 0.8929975067867952
Epoch [10/300], Loss: 0.8699284452382866
Epoch [11/300], Loss: 0.8552517491914428
Epoch [12/300], Loss: 0.844140020554023
Epoch [13/300], Loss: 0.8275549498284259
Epoch [14/300], Loss: 0.8177754930637376
Epoch [15/300], Loss: 0.8066412126240523
Epoch [16/300], Loss: 0.7945711970939051
Epoch [17/300], Loss: 0.7900855072090388
Epoch [18/300], Loss: 0.7770882659327344
Epoch [19/300], Loss: 0.7747808933029394
Epoch [20/300], Loss: 0.7582680306318775
Epoch [21/300], Loss: 0.7611302339741032
Epoch [22/300], Loss: 0.7455419183463392
Epoch [23/300], Loss: 0.73940995

In [1]:
# part 1b
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import time
from torch.cuda.amp import GradScaler, autocast
from torch.optim.lr_scheduler import StepLR

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 300
batch_size = 128  # Adjust based on GPU capability
learning_rate = 0.001
num_workers = 4   # Based on system's capability
lr_step_size = 50  # Learning rate step size
lr_gamma = 0.1     # Learning rate decay factor

# CIFAR10 Dataset with efficient data loading
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=num_workers)

# Simplified CNN Architecture
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc_layers = nn.Sequential(
            nn.Linear(8 * 8 * 64, 1000),
            nn.ReLU(),
            nn.Linear(1000, 10))

    def forward(self, x):
        out = self.conv_layers(x)
        out = out.view(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc_layers(out)
        return out

model = ConvNet().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = StepLR(optimizer, step_size=lr_step_size, gamma=lr_gamma)

# Initialize the gradient scaler for mixed precision
scaler = GradScaler()

# Train the model
start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)

        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

    scheduler.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader)}')

training_time = time.time() - start_time
print(f'Training completed in: {training_time:.2f} seconds')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on test images: {accuracy}%')
# Save the model checkpoint
torch.save(model.state_dict(), 'cifar10_extended_model.pth')

Files already downloaded and verified
Files already downloaded and verified
Epoch [1/300], Loss: 1.3799286731673628
Epoch [2/300], Loss: 1.0487423141289245
Epoch [3/300], Loss: 0.8831510139853144
Epoch [4/300], Loss: 0.7698540664694803
Epoch [5/300], Loss: 0.6616150987575121
Epoch [6/300], Loss: 0.580741146031548
Epoch [7/300], Loss: 0.4992279327281601
Epoch [8/300], Loss: 0.4177770873393549
Epoch [9/300], Loss: 0.3511101286810682
Epoch [10/300], Loss: 0.30481810277075416
Epoch [11/300], Loss: 0.26484668653105836
Epoch [12/300], Loss: 0.22321723153828965
Epoch [13/300], Loss: 0.20123543451204323
Epoch [14/300], Loss: 0.18112143330142627
Epoch [15/300], Loss: 0.16103756136220435
Epoch [16/300], Loss: 0.14626412157474272
Epoch [17/300], Loss: 0.13874410177626267
Epoch [18/300], Loss: 0.12919597764073126
Epoch [19/300], Loss: 0.12151677928426687
Epoch [20/300], Loss: 0.12052348753451692
Epoch [21/300], Loss: 0.11355746538399736
Epoch [22/300], Loss: 0.10841676740028212
Epoch [23/300], Los

In [2]:
# 2 a 
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 300
batch_size = 64
learning_rate = 0.001

# CIFAR10 Dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

# ResNet Block
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# ResNet-10
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

model = ResNet(ResNetBlock, [1, 1, 1, 1]).to(device)  # ResNet-10

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
start_time = time.time()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (images, labels) in enumerate(trainloader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader)}')

end_time = time.time()
training_time = end_time - start_time
print(f'Training completed in: {training_time:.2f} seconds')

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the model on the 10000 test images: {100 * correct / total}%')

# Save the model checkpoint
torch.save(model.state_dict(), 'resnet10_cifar10.pth')


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/300], Loss: 1.2426466781007663
Epoch [2/300], Loss: 0.7660231165721288
Epoch [3/300], Loss: 0.5638005886503192
Epoch [4/300], Loss: 0.42534488350000527
Epoch [5/300], Loss: 0.3113696592123917
Epoch [6/300], Loss: 0.20641383223349938
Epoch [7/300], Loss: 0.1419284339265331
Epoch [8/300], Loss: 0.09960545654363377
Epoch [9/300], Loss: 0.0766152562835561
Epoch [10/300], Loss: 0.07165640188753843
Epoch [11/300], Loss: 0.05505694715958803
Epoch [12/300], Loss: 0.056522922242915406
Epoch [13/300], Loss: 0.044024212211888535
Epoch [14/300], Loss: 0.04797049951733655
Epoch [15/300], Loss: 0.030160778478640096
Epoch [16/300], Loss: 0.04497442332327442
Epoch [17/300], Loss: 0.02818851375567835
Epoch [18/300], Loss: 0.033255965898499426
Epoch [19/300], Loss: 0.029091637036801836
Epoch [20/300], Loss: 0.03038401165287114
Epoch [21/300], Loss: 0.027612376682352526
Epoch [22/300], Loss: 0.028150657035151076
Epoch [2

In [3]:
#2b
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 300
batch_size = 64
learning_rate = 0.001
weight_decay = 0.001  # For weight decay
dropout_rate = 0.3    # For dropout

# CIFAR10 Dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

# ResNet Block
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, use_dropout=False):
        super(ResNetBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)  # Batch Normalization
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)  # Batch Normalization
        self.dropout = nn.Dropout(dropout_rate) if use_dropout else nn.Identity()

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)  # Batch Normalization
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.dropout(out)  # Apply dropout
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# ResNet-10 with regularization options
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, use_dropout=False):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)  # Batch Normalization
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, use_dropout=use_dropout)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, use_dropout=use_dropout)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, use_dropout=use_dropout)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, use_dropout=use_dropout)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride, use_dropout):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride, use_dropout=use_dropout))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# Select the regularization method here
use_weight_decay = False
use_dropout = True  # Set to False for experiments without dropout
use_batch_norm = True  # Batch Normalization is integrated in the model

model = ResNet(ResNetBlock, [1, 1, 1, 1], use_dropout=use_dropout).to(device)

# Loss and optimizer
# Adjust weight decay in the optimizer for L2 regularization
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay if use_weight_decay else 0)

# Train the model
start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in trainloader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader)}')

training_time = time.time() - start_time
print(f'Training completed in: {training_time:.2f} seconds')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on test images: {accuracy}%')

# Save the model with a different name for each regularization method
model_path = 'C:\\Users\\hanba\\OneDrive\\Documents\\unc c\\introML\\MLHWCore\\HW7\\resnet10_cifar10_regularized.pth'
torch.save(model.state_dict(), model_path)


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/300], Loss: 1.2428373835047188
Epoch [2/300], Loss: 0.7985277343970125
Epoch [3/300], Loss: 0.6199493572077788
Epoch [4/300], Loss: 0.5040087837278081
Epoch [5/300], Loss: 0.4125342353644883
Epoch [6/300], Loss: 0.33750051123749875
Epoch [7/300], Loss: 0.2659018806960729
Epoch [8/300], Loss: 0.20814111295258603
Epoch [9/300], Loss: 0.16958500567080495
Epoch [10/300], Loss: 0.13377332363916022
Epoch [11/300], Loss: 0.11404700608104658
Epoch [12/300], Loss: 0.10014884855748271
Epoch [13/300], Loss: 0.08453160076690337
Epoch [14/300], Loss: 0.07705410788802768
Epoch [15/300], Loss: 0.0784136384738671
Epoch [16/300], Loss: 0.06520066956770332
Epoch [17/300], Loss: 0.0573893343762416
Epoch [18/300], Loss: 0.05841569407322608
Epoch [19/300], Loss: 0.05061913601985044
Epoch [20/300], Loss: 0.051796716434108166
Epoch [21/300], Loss: 0.04868681867124008
Epoch [22/300], Loss: 0.04140785434450049
Epoch [23/300], 