In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize the pixel values
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128) 
        self.fc2 = nn.Linear(128, 10)  

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7) 
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


model = CNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(2):  
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


[1,   100] loss: 0.711
[1,   200] loss: 0.168
[1,   300] loss: 0.120
[1,   400] loss: 0.096
[1,   500] loss: 0.085
[1,   600] loss: 0.068
[1,   700] loss: 0.075
[1,   800] loss: 0.066
[1,   900] loss: 0.064
[2,   100] loss: 0.046
[2,   200] loss: 0.044
[2,   300] loss: 0.053
[2,   400] loss: 0.039
[2,   500] loss: 0.051
[2,   600] loss: 0.046
[2,   700] loss: 0.046
[2,   800] loss: 0.039
[2,   900] loss: 0.042
Finished Training
Accuracy of the network on the 10000 test images: 98 %


In [37]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class MNISTObjectDetectionDataset(Dataset):
    def __init__(self, mnist_dataset):
        self.mnist_dataset = mnist_dataset
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((64, 64)),  # Resize images to fit the expected input size of Faster R-CNN
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.mnist_dataset)

    def __getitem__(self, idx):
        image, label = self.mnist_dataset[idx]
        image = self.transform(image)
        target = {
            "boxes": torch.tensor([[0, 0, 64, 64]], dtype=torch.float32),  # Assuming single bounding box for entire image
            "labels": torch.tensor([label], dtype=torch.int64),
        }
        return image, target

# Load MNIST dataset
mnist_train = MNIST(root='./data', train=True, download=True, transform=ToTensor())
mnist_test = MNIST(root='./data', train=False, download=True, transform=ToTensor())

# Create Faster R-CNN model
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280  # Adjusting backbone output channels
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                   aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                 output_size=7,
                                                 sampling_ratio=2)
model = FasterRCNN(backbone,
                   num_classes=10,  # 10 classes for digits 0-9
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=roi_pooler)

# Convert MNIST dataset to object detection dataset
train_dataset = MNISTObjectDetectionDataset(mnist_train)
test_dataset = MNISTObjectDetectionDataset(mnist_test)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4)

# Define device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Move model to device
model.to(device)

# Define optimizer and loss function
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [11]:
import time
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

# Define function to train and evaluate a model
def train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, num_epochs=2):
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Print average loss after each epoch
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

    end_time = time.time()
    print(f"Training time: {end_time - start_time} seconds")

    # Evaluate model on test set
    model.eval()
    test_predictions = []
    true_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, test_predictions)
    f1 = f1_score(true_labels, test_predictions, average='weighted')

    return accuracy, f1

# Train and evaluate CNN model
cnn_model = CNN().to(device)
cnn_optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)
cnn_criterion = nn.CrossEntropyLoss()

cnn_accuracy, cnn_f1 = train_and_evaluate(cnn_model, trainloader, testloader, cnn_criterion, cnn_optimizer)

# Train and evaluate Faster R-CNN model (For illustration purposes)
faster_rcnn_model = model
faster_rcnn_optimizer = optimizer
faster_rcnn_criterion = nn.CrossEntropyLoss()

faster_rcnn_accuracy, faster_rcnn_f1 = train_and_evaluate(faster_rcnn_model, train_loader, test_loader, faster_rcnn_criterion, faster_rcnn_optimizer)

# Print results
print("CNN Model:")
print("Accuracy:", cnn_accuracy)
print("F1 Score:", cnn_f1)

print("Faster R-CNN Model:")
print("Accuracy:", faster_rcnn_accuracy)
print("F1 Score:", faster_rcnn_f1)


Epoch 1, Loss: 0.16699306400212796
Epoch 2, Loss: 0.046605426322808785
Training time: 30.359601497650146 seconds




AttributeError: 'dict' object has no attribute 'to'

In [None]:
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import torch
from sklearn.metrics import accuracy_score, f1_score
import time

# Define function to fine-tune pre-trained model
def fine_tune(model, train_loader, test_loader, optimizer, criterion, num_epochs=2):
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Print average loss after each epoch
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

    end_time = time.time()
    print(f"Training time: {end_time - start_time} seconds")

    # Evaluate model on test set
    model.eval()
    test_predictions = []
    true_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    # Calculate accuracy and F1 score
    accuracy = accuracy_score(true_labels, test_predictions)
    f1 = f1_score(true_labels, test_predictions, average='weighted')

    return accuracy, f1

# Load pre-trained VGG16 model
vgg16_model = models.vgg16(pretrained=True)

# Modify the first convolutional layer to accept single-channel images
vgg16_model.features[0] = nn.Conv2d(1, 64, kernel_size=3, padding=1)

# Modify the classifier part of VGG16 to fit the MNIST dataset (10 output classes)
num_features = vgg16_model.classifier[0].in_features
vgg16_model.classifier = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(256, 128),
    nn.ReLU(True),
    nn.Dropout(),
    nn.Linear(128, 10)
)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg16_model.to(device)

# Define optimizer and criterion
vgg16_optimizer = optim.SGD(vgg16_model.parameters(), lr=0.001, momentum=0.9)
vgg16_criterion = nn.CrossEntropyLoss()

# Fine-tune VGG16 model
vgg16_accuracy, vgg16_f1 = fine_tune(vgg16_model, trainloader, testloader, vgg16_optimizer, vgg16_criterion)

# Print results
print("VGG16 Model:")
print("Accuracy:", vgg16_accuracy)
print("F1 Score:", vgg16_f1)
