In [1]:
import torch
from torch import nn, optim

import torchvision.transforms as transforms
import torchvision.datasets as datasets


  from .autonotebook import tqdm as notebook_tqdm
2024-05-28 21:56:18.219840: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [7]:
class Vgg_16(nn.Module):

    '''
    Vgg_16 model architecture.

    Parameters:
    - num_classes (int, optional): Number of classes in the classification task. Defaults to 1000.


    '''
    
    def __init__(self, num_classes=1000):
        
        super(Vgg_16, self).__init__()
        
        self.loss_log = []
        self.accuracy_log = []
        
        self.convolution_layers = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(64, 128, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(128, 128, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(128, 256, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(256, 256, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(256, 256, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(256, 512, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(512, 512, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(512, 512, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Conv2d(512, 512, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(512, 512, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.Conv2d(512, 512, kernel_size=3, padding=1),
            # nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.dense_layers = nn.Sequential(
            nn.Linear(64*7*7, 512),
            nn.ReLU(True),
            nn.Dropout(),
            # nn.Linear(4096, 4096),
            # nn.ReLU(True),
            # nn.Dropout(),
            nn.Linear(512, num_classes),
        )

    def forward(self, x):
        
        '''
        Forward pass of the Vgg_16 model.

        Parameters:
        - x (torch.Tensor): Input tensor.

        Returns:
        - torch.Tensor: Output tensor.
        '''
        
        x = self.convolution_layers(x)
        # print(x.shape)
        x = self.avgpool(x)
        # print(x.shape)
        x = torch.flatten(x, 1)
        x = self.dense_layers(x)
        return x
    
    def fit(self, data, loss_func=nn.CrossEntropyLoss, optimizer=optim.Adam, epochs=10, device="cpu"):
        
        '''
        Trains the Vgg_16 model.

        Parameters:
        - data (torch.utils.data.DataLoader): DataLoader providing the training data.
        - loss_func (torch.nn.Module, optional): Loss function to be used for training. Defaults to nn.CrossEntropyLoss.
        - optimizer (torch.optim.Optimizer, optional): Optimizer to be used for training. Defaults to optim.Adam.
        - epochs (int, optional): Number of epochs for training. Defaults to 10.
        - device (str, optional): Device to run the training on ('cpu' or 'cuda'). Defaults to "cpu".

        '''
        
        self.to(device=device)
        
        loss_func = loss_func()
        optimizer = optimizer(self.parameters(), lr=0.001)
        
        # Training loop
        for epoch in range(epochs):
            self.train()
            running_loss = 0.0
            correct_predictions = 0
            total_samples = 0
            
            for batch_idx, (images, labels) in enumerate(data):
                images, labels = images.to(device), labels.to(device)
                outputs = self(images)
                
                # print(labels.shape, outputs.shape)
                
                loss = loss_func(outputs, labels)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_samples += labels.size(0)
                correct_predictions += (predicted == labels).sum().item()
                
                # print(f"Epoch: {epoch+1}\tbatch:{batch_idx+1}")
                
                            
            epoch_loss = running_loss / len(data)
            epoch_accuracy = correct_predictions / total_samples
            
            self.loss_log.append(epoch_loss)
            self.accuracy_log.append(epoch_accuracy)
            
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {100 * epoch_accuracy:.2f}%')

        
    def evaluate(self, dataloader):
        
        '''
        Evaluates the Vgg_16 model.

        Parameters:
        - dataloader (torch.utils.data.DataLoader): DataLoader providing the evaluation data.

        Returns:
        - float: Accuracy of the model on the evaluation data.
        '''
        
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        correct_predictions = 0
        total_samples = 0
        
        self.to(device=device)
        self.eval()
        
        with torch.no_grad():
            for images, labels in dataloader:
                
                images, labels = images.to(device), labels.to(device)
                
                
                outputs = self(images)
                
                _, predicted = torch.max(outputs.data, dim=1)
                
                total_samples += labels.shape[0]
                correct_predictions += (predicted == labels).sum().item()
                
        
        return correct_predictions / total_samples

In [8]:
# Define data transformations
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(), # Convert PIL Image to tensor
    transforms.Normalize((0.5,), (0.5,)) # Normalize the pixel values to the range [-1, 1]
])

# Load FashionMNIST dataset
fashion_mnist_train = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
fashion_mnist_test = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Limit the size of the train dataset to 10,000 samples
train_indices = torch.randperm(len(fashion_mnist_train))[:5000]
train_subset = torch.utils.data.Subset(fashion_mnist_train, train_indices)

# Limit the size of the test dataset to 1,000 samples
test_indices = torch.randperm(len(fashion_mnist_test))[:1000]
test_subset = torch.utils.data.Subset(fashion_mnist_test, test_indices)

# Create data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_subset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_subset, batch_size=64, shuffle=False)



In [9]:
model = Vgg_16(num_classes=10)


sum(param.numel() for param in model.parameters())

1648842

In [None]:
model.fit(train_loader, device=device)

In [16]:
# torch.save(model, "./model_vgg.pth")
model = torch.load("./model_vgg.pth")

In [18]:
model.evaluate(train_loader)

0.8264

In [19]:
model.evaluate(test_loader)

0.813