Enable GPU

In [1]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available. Using device:", device)
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU is not available. Using CPU.")

GPU is not available. Using CPU.


Data Loading (MNIST from Kaggle - UByte Files)

In [14]:
import numpy as np
import struct
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

def read_idx(filename):
    """Reads an IDX file and returns a NumPy array."""
    with open(filename, 'rb') as f:
        magic, size = struct.unpack(">II", f.read(8))

        if magic == 2051:
            rows, cols = struct.unpack(">II", f.read(8))
            data = np.fromfile(f, dtype=np.dtype(np.uint8).newbyteorder('>')).reshape(size, rows, cols)
        elif magic == 2049:
            data = np.fromfile(f, dtype=np.dtype(np.uint8).newbyteorder('>'))
        else:
            raise ValueError("Invalid magic number: {}".format(magic))
    return data

class MNISTUByteDataset(Dataset):
    def __init__(self, images_file, labels_file, transform=None):
        self.images = read_idx(images_file)
        self.labels = read_idx(labels_file)
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]


        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)

        return image, label



transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])


train_images_file = 'train-images.idx3-ubyte'  # If the file is in the root
train_labels_file = 'train-labels.idx1-ubyte'
test_images_file = 't10k-images.idx3-ubyte'
test_labels_file = 't10k-labels.idx1-ubyte'


train_dataset = MNISTUByteDataset(train_images_file, train_labels_file, transform=transform)
test_dataset = MNISTUByteDataset(test_images_file, test_labels_file, transform=transform)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

print("Data loaded from UByte files.")

Data loaded from UByte files.


In [8]:
!pip install Pillow



In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import time

In [11]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
num_epochs = 10

start_time = time.time()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

training_time = time.time() - start_time
print('Finished Training')
print(f'Training time: {training_time:.2f} seconds')

[1,   200] loss: 0.346
[1,   400] loss: 0.094
[1,   600] loss: 0.070
[1,   800] loss: 0.070
[2,   200] loss: 0.043
[2,   400] loss: 0.043
[2,   600] loss: 0.043
[2,   800] loss: 0.039
[3,   200] loss: 0.026
[3,   400] loss: 0.028
[3,   600] loss: 0.031
[3,   800] loss: 0.029
[4,   200] loss: 0.020
[4,   400] loss: 0.019
[4,   600] loss: 0.019
[4,   800] loss: 0.023
[5,   200] loss: 0.011
[5,   400] loss: 0.017
[5,   600] loss: 0.016
[5,   800] loss: 0.017
[6,   200] loss: 0.012
[6,   400] loss: 0.013
[6,   600] loss: 0.010
[6,   800] loss: 0.010
[7,   200] loss: 0.008
[7,   400] loss: 0.006
[7,   600] loss: 0.014
[7,   800] loss: 0.010
[8,   200] loss: 0.009
[8,   400] loss: 0.008
[8,   600] loss: 0.008
[8,   800] loss: 0.011
[9,   200] loss: 0.008
[9,   400] loss: 0.008
[9,   600] loss: 0.007
[9,   800] loss: 0.007
[10,   200] loss: 0.005
[10,   400] loss: 0.004
[10,   600] loss: 0.007
[10,   800] loss: 0.011
Finished Training
Training time: 973.72 seconds


In [None]:
model.eval()
all_predictions = []
all_labels = []

with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions, average='weighted')

print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')

aster R-CNN

In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import time
from sklearn.metrics import accuracy_score, f1_score



class MNISTObjectDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):

        image = Image.fromarray(self.images[idx])


        image = F.to_tensor(image)


        boxes = torch.tensor([[0, 0, 27, 27]], dtype=torch.float32)


        labels = torch.tensor([self.labels[idx]], dtype=torch.int64)


        target = {}
        target["boxes"] = boxes
        target["labels"] = labels + 1
        target["image_id"] = torch.tensor([idx])
        target["area"] = torch.tensor([28 * 28])
        target["iscrowd"] = torch.tensor([0],dtype=torch.int64)


        return image, target



train_dataset = MNISTObjectDataset(train_dataset.images,train_dataset.labels)
test_dataset = MNISTObjectDataset(test_dataset.images,test_dataset.labels)

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
data_loader_test = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)


model = fasterrcnn_resnet50_fpn(pretrained=True)


num_classes = 11
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.to(device)


params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)



num_epochs = 2
start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    i = 0
    epoch_loss = 0.0
    for images, targets in data_loader:
        i += 1
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if i % 100 == 0:
            print(f"Epoch: {epoch}, Iteration: {i}, Loss: {losses}")

    lr_scheduler.step()
    print(f"Epoch {epoch} Loss: {epoch_loss/len(data_loader)}")
training_time_frcnn = time.time() - start_time
print(f"Finished training FRCNN. Total training time: {training_time_frcnn:.2f} seconds")


model.eval()
all_predictions = []
all_labels = []

with torch.no_grad():
    for images, targets in data_loader_test:
        images = list(image.to(device) for image in images)
        predictions = model(images)


        for i, prediction in enumerate(predictions):

            predicted_labels = prediction['labels'].cpu().numpy() - 1
            true_labels = targets[i]['labels'].cpu().numpy() - 1

            all_predictions.extend(predicted_labels)
            all_labels.extend(true_labels)


accuracy = accuracy_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions, average='weighted')

print(f"Faster R-CNN Accuracy: {accuracy:.4f}")
print(f"Faster R-CNN F1 Score: {f1:.4f}")

Comparison & Fine-tuning

In [None]:
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import time
from sklearn.metrics import accuracy_score, f1_score


vgg16 = models.vgg16(pretrained=True)


for param in vgg16.features.parameters():
    param.requires_grad = False


num_features = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(num_features, 10)
vgg16 = vgg16.to(device)


criterion_vgg = nn.CrossEntropyLoss()
optimizer_vgg = optim.Adam(vgg16.parameters(), lr=0.001)


num_epochs = 2
start_time_vgg = time.time()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer_vgg.zero_grad()
        outputs = vgg16(inputs)
        loss = criterion_vgg(outputs, labels)
        loss.backward()
        optimizer_vgg.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 200:.3f}')
            running_loss = 0.0
training_time_vgg = time.time() - start_time_vgg
print(f"Finished training VGG16. Total training time: {training_time_vgg:.2f} seconds")

vgg16.eval()
all_predictions_vgg = []
all_labels_vgg = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = vgg16(inputs)
        _, predicted = torch.max(outputs.data, 1)
        all_predictions_vgg.extend(predicted.cpu().numpy())
        all_labels_vgg.extend(labels.cpu().numpy())

accuracy_vgg = accuracy_score(all_labels_vgg, all_predictions_vgg)
f1_vgg = f1_score(all_labels_vgg, all_predictions_vgg, average='weighted')

print(f'VGG16 Accuracy: {accuracy_vgg:.4f}')
print(f'VGG16 F1 Score: {f1_vgg:.4f}')


alexnet = models.alexnet(pretrained=True)


for param in alexnet.features.parameters():
    param.requires_grad = False


num_features = alexnet.classifier[6].in_features
alexnet.classifier[6] = nn.Linear(num_features, 10)
alexnet = alexnet.to(device)


criterion_alexnet = nn.CrossEntropyLoss()
optimizer_alexnet = optim.Adam(alexnet.parameters(), lr=0.001)


start_time_alexnet = time.time()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer_alexnet.zero_grad()
        outputs = alexnet(inputs)
        loss = criterion_alexnet(outputs, labels)
        loss.backward()
        optimizer_alexnet.step()

        running_loss += loss.item()
        if i % 200 == 199:
            print(f'Epoch: {epoch + 1}, Batch: {i + 1}, Loss: {running_loss / 200:.3f}')
            running_loss = 0.0

training_time_alexnet = time.time() - start_time_alexnet
print(f"Finished training AlexNet. Total training time: {training_time_alexnet:.2f} seconds")


alexnet.eval()
all_predictions_alexnet = []
all_labels_alexnet = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = alexnet(inputs)
        _, predicted = torch.max(outputs.data, 1)
        all_predictions_alexnet.extend(predicted.cpu().numpy())
        all_labels_alexnet.extend(labels.cpu().numpy())

accuracy_alexnet = accuracy_score(all_labels_alexnet, all_predictions_alexnet)
f1_alexnet = f1_score(all_labels_alexnet, all_predictions_alexnet, average='weighted')

print(f'AlexNet Accuracy: {accuracy_alexnet:.4f}')
print(f'AlexNet F1 Score: {f1_alexnet:.4f}')


print("\nModel Comparison:")
print(f"CNN Training Time: {training_time:.2f} seconds")
print(f"CNN Accuracy: {accuracy:.4f}")
print(f"CNN F1 Score: {f1:.4f}")

print(f"Faster R-CNN Training Time: {training_time_frcnn:.2f} seconds")
print(f"Faster R-CNN Accuracy: {accuracy:.4f}")
print(f"Faster R-CNN F1 Score: {f1:.4f}")


print(f"VGG16 Training Time: {training_time_vgg:.2f} seconds")
print(f"VGG16 Accuracy: {accuracy_vgg:.4f}")
print(f"VGG16 F1 Score: {f1_vgg:.4f}")

print(f"AlexNet Training Time: {training_time_alexnet:.2f} seconds")
print(f"AlexNet Accuracy: {accuracy_alexnet:.4f}")
print(f"AlexNet F1 Score: {f1_alexnet:.4f}")