1. Establish a CNN Architecture (Based on Pytorch Library) to classify MINST Dataset



In [None]:


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
batch_size = 64
learning_rate = 0.001
num_epochs = 10

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 144978785.32it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 26681870.89it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 47360360.73it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 12042053.58it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')


[1,   100] loss: 0.688
[1,   200] loss: 0.199
[1,   300] loss: 0.128
[1,   400] loss: 0.106
[1,   500] loss: 0.089
[1,   600] loss: 0.081
[1,   700] loss: 0.071
[1,   800] loss: 0.060
[1,   900] loss: 0.066
[2,   100] loss: 0.053
[2,   200] loss: 0.052
[2,   300] loss: 0.047
[2,   400] loss: 0.050
[2,   500] loss: 0.051
[2,   600] loss: 0.037
[2,   700] loss: 0.041
[2,   800] loss: 0.047
[2,   900] loss: 0.037
[3,   100] loss: 0.037
[3,   200] loss: 0.031
[3,   300] loss: 0.028
[3,   400] loss: 0.029
[3,   500] loss: 0.033
[3,   600] loss: 0.032
[3,   700] loss: 0.026
[3,   800] loss: 0.037
[3,   900] loss: 0.028
[4,   100] loss: 0.020
[4,   200] loss: 0.022
[4,   300] loss: 0.024
[4,   400] loss: 0.025
[4,   500] loss: 0.028
[4,   600] loss: 0.023
[4,   700] loss: 0.022
[4,   800] loss: 0.029
[4,   900] loss: 0.024
[5,   100] loss: 0.013
[5,   200] loss: 0.017
[5,   300] loss: 0.014
[5,   400] loss: 0.013
[5,   500] loss: 0.021
[5,   600] loss: 0.029
[5,   700] loss: 0.015
[5,   800] 

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


Accuracy of the network on the 10000 test images: 99 %


2. Do the same thing with Faster R-CNN

In [1]:


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torchvision.transforms as transforms
import numpy as np
import struct
from array import array
class FasterRCNNNet(torch.nn.Module):
    def __init__(self, num_classes):
        super(FasterRCNNNet, self).__init__()
        self.faster_rcnn_model = fasterrcnn_resnet50_fpn(pretrained=True)
        in_features = self.faster_rcnn_model.roi_heads.box_predictor.cls_score.in_features
        self.faster_rcnn_model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)
    def load_pretrained_weights(self, model_path):
        state_dict = torch.load(model_path)
        mapping_dict = {
            "conv1.weight": "backbone.body.conv1.weight",
            "bn1.weight": "backbone.body.bn1.weight",
        }
        adjusted_state_dict = {}
        for key, value in state_dict.items():
            if key in mapping_dict:
                adjusted_key = mapping_dict[key]
                adjusted_state_dict[adjusted_key] = value
        self.faster_rcnn_model.load_state_dict(adjusted_state_dict, strict=False)
    def forward(self, images, targets=None):
        if self.training and targets is not None:
            targets = [{k: v for k, v in target.items()} for target in targets]
            loss_dict = self.faster_rcnn_model(images, targets)
            return sum(loss for loss in loss_dict.values())
        else:
            return self.faster_rcnn_model(images)

In [2]:
class FastRCNNPredictor(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(FastRCNNPredictor, self).__init__()
        self.cls_score = nn.Linear(in_channels, num_classes)
    def forward(self, x):
        return self.cls_score(x)

In [3]:
import PIL

class MnistDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image = self.images[index]
        label = self.labels[index]

        # Convert list of pixels to PIL image
        image = PIL.Image.fromarray(np.array(image))

        if self.transform:
            image = self.transform(image)

        height, width = image.shape[-2:]
        box = torch.tensor([0, 0, width, height], dtype=torch.float32)

        sample = {
            "images": image,
            "boxes": box,
            "labels": torch.tensor(label, dtype=torch.int64)
        }
        return sample

In [None]:
class MnistDataloader(object):
    def __init__(self, training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath, transform=None):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
        self.transform = transform
    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img
        return images, labels
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        train_dataset = MnistDataset(x_train, y_train, transform=self.transform)
        test_dataset = MnistDataset(x_test, y_test, transform=self.transform)
        return train_dataset, test_dataset

In [None]:
transform = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
mnist_dataloader = MnistDataloader(
    '/kaggle/input/mnist-dataset/train-images-idx3-ubyte/train-images-idx3-ubyte',
    '/kaggle/input/mnist-dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte',
    '/kaggle/input/mnist-dataset/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte',
    '/kaggle/input/mnist-dataset/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte',
    transform=transform
)

batch_size = 2
train_dataset, test_dataset = mnist_dataloader.load_data()
mnist_train = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
mnist_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
num_classes = 10
faster_rcnn_net = FasterRCNNNet(num_classes)
model_path = "/kaggle/input/cocodataset/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth"
faster_rcnn_net.load_pretrained_weights(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
faster_rcnn_net.to(device)
optimizer = optim.Adam(faster_rcnn_net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
criterion.to(device)

In [None]:
def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    for batch in dataloader:
        images = batch["images"].to(device)
        boxes = batch["boxes"].to(device)
        labels = batch["labels"].to(device)
        targets = [{"boxes": boxes, "labels": labels}]
        optimizer.zero_grad()
        outputs = model(images, targets)
        loss = sum(outputs.values())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

In [None]:
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total_samples = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct += (predicted == labels).sum().item()
    avg_loss = total_loss / len(dataloader)
    accuracy = correct / total_samples
    return avg_loss, accuracy

In [None]:

num_classes = 10
faster_rcnn_net = FasterRCNNNet(num_classes)
model_path = "/kaggle/input/cocodataset/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth"
faster_rcnn_net.load_pretrained_weights(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
faster_rcnn_net.to(device)
optimizer = optim.Adam(faster_rcnn_net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
criterion.to(device)

By using retrained models (VGG16 and AlexNet) fine tune your model to the new dataSet,4.

In [4]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn.functional as F

In [5]:
class MnistDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image = np.array(self.images[index], dtype=np.uint8).reshape(28, 28, 1)
        label = self.labels[index]

        if self.transform:
            image = self.transform(image)

        return image, label

In [6]:
class MnistDataloader(object):
    def __init__(self, training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath, transform=None):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
        self.transform = transform

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img
        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        train_dataset = MnistDataset(x_train, y_train, transform=self.transform)
        test_dataset = MnistDataset(x_test, y_test, transform=self.transform)
        return train_dataset, test_dataset

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mnist_dataloader = MnistDataloader('/kaggle/input/mnist-dataset/train-images-idx3-ubyte/train-images-idx3-ubyte',
                                   '/kaggle/input/mnist-dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte',
                                   '/kaggle/input/mnist-dataset/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte',
                                   '/kaggle/input/mnist-dataset/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte',
                                   transform=transform)

train_dataset, test_dataset = mnist_dataloader.load_data()
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model_cnn = CNN().to(device)

In [None]:
class VGG16OneChannel(nn.Module):
    def __init__(self):
        super(VGG16OneChannel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

model_vgg16_one_channel = VGG16OneChannel().to(device)

In [None]:
class alexnetChannel(nn.Module):
    def __init__(self):
        super(alexnetChannel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

model_alexnet = alexnetChannel().to(device)

In [None]:
def fine_tune(model, train_loader, test_loader, num_epochs=5, learning_rate=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(num_epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = correct / total
        print(f"Epoch {epoch + 1}/{num_epochs}, Accuracy: {accuracy}")