Official Documentation: https://pytorch.org/hub/pytorch_vision_alexnet/

In [57]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
from google.colab import drive
import os
import torch.nn as nn
from PIL import Image
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
drive.mount('/content/drive')
dataset_path = "/content/drive/MyDrive/Trade_finance_small_dataset"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [58]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [59]:
for param in model.parameters():
    param.requires_grad = False
num_classes = 5
model.classifier[6] = torch.nn.Linear(4096, num_classes)

In [60]:
from torchsummary import summary

summary(model, (3, 224, 224))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 55, 55]          23,296
              ReLU-2           [-1, 64, 55, 55]               0
         MaxPool2d-3           [-1, 64, 27, 27]               0
            Conv2d-4          [-1, 192, 27, 27]         307,392
              ReLU-5          [-1, 192, 27, 27]               0
         MaxPool2d-6          [-1, 192, 13, 13]               0
            Conv2d-7          [-1, 384, 13, 13]         663,936
              ReLU-8          [-1, 384, 13, 13]               0
            Conv2d-9          [-1, 256, 13, 13]         884,992
             ReLU-10          [-1, 256, 13, 13]               0
           Conv2d-11          [-1, 256, 13, 13]         590,080
             ReLU-12          [-1, 256, 13, 13]               0
        MaxPool2d-13            [-1, 256, 6, 6]               0
AdaptiveAvgPool2d-14            [-1, 25

In [61]:
classes = os.listdir(dataset_path)
classes
label_dict = {value: idx for idx, value in enumerate(classes)}
id_label_dict = {idx: value for idx, value in enumerate(classes)}

In [62]:
id_label_dict

{0: 'packing_list',
 1: 'bill_of_lading_first_page',
 2: 'certificate_of_origin_first_page',
 3: 'Insurance_Certificate_pngs_first_page',
 4: 'covering_schedule'}

In [63]:
train_images_dict = {
            'images': [],  # List of train images as PIL images
            'labels': []  # List of corresponding labels (strings) for train images
        }

test_images_dict = {
            'images': [],
            'labels': []
        }



In [64]:
ls

[0m[01;34mAlexnetModels[0m/


In [47]:
%cd drive/MyDrive/AlexNet

/content/drive/MyDrive/AlexNet


In [65]:
os.mkdir("train_images")
os.mkdir("test_images")

In [66]:
import random
import shutil
from PIL import Image
random.seed(10)
train_files = []
test_files = []
for document in os.listdir(dataset_path):
  document_path = os.path.join(dataset_path,document)
  files= os.listdir(os.path.join(dataset_path,document))
  random.shuffle(files)
  for train_file in files[:-int(0.2 * len(files))]:
    if train_file not in os.listdir("train_images"):
      shutil.copy(os.path.join(document_path, train_file), "train_images")
      img = Image.open(os.path.join(document_path, train_file)).convert("RGB")
      train_images_dict["images"].append(img)
      train_images_dict["labels"].append(label_dict[document])
  for test_file in files[-int(0.2 * len(files)):]:
    if test_file not in os.listdir("test_images"):
      shutil.copy(os.path.join(document_path, test_file), "test_images")
      img = Image.open(os.path.join(document_path, test_file)).convert("RGB")
      test_images_dict["images"].append(img)
      test_images_dict["labels"].append(label_dict[document])


In [67]:
class ImageDataset(Dataset):
    '''
        Sample Input:
        train_images = {
            'images': [PIL_image1, PIL_image2, PIL_image3, ...],  # List of train images as PIL images
            'labels': ['cat', 'dog', 'bird', ...]  # List of corresponding labels (strings) for train images
        }

        test_images = {
            'images': [PIL_image4, PIL_image5, PIL_image6, ...],
            'labels': ['cat', 'dog', 'bird', ...]
        }

        val_images = {
            'images': [PIL_image7, PIL_image8, PIL_image9, ...],
            'labels': ['cat', 'dog', 'bird', ...]
        }
    '''
    def __init__(self, image_dict, transform=None):
        self.image_dict = image_dict
        self.transform = transform

    def __len__(self):
        return len(self.image_dict['images'])

    def __getitem__(self, idx):
        image = self.image_dict['images'][idx]
        label = self.image_dict['labels'][idx]

        if self.transform:
            image = self.transform(image)

        # Convert label to tensor
        label = torch.tensor(int(label))

        return image, label

The mean and standard deviation are calculated on the training set of the ImageNet dataset, which consists of a large number of RGB images. By normalizing the input image using the same mean and standard deviation values, you are aligning the input data distribution with the data distribution seen during training.

The mean values represent the **average intensity of the red, green, and blue channels across the ImageNet training set**. The standard deviation values indicate the variation or spread of the pixel values within each channel.

When you normalize an input image using these mean and standard deviation values, each channel's pixel values will be adjusted to have a mean of approximately 0 and a standard deviation of approximately 1. This normalization step helps the model to handle the input data more effectively and can improve training convergence and performance.

It's important to note that these mean and standard deviation values are specific to the ImageNet dataset and models trained on it. If you're working with a different dataset or a model trained on a different dataset, you may need to use different mean and standard deviation values for normalization based on the statistics of your specific dataset.

```python
input_normalized = (input_tensor - mean) / std
```
- `input_tensor` values range [0,1]
- `input_tensor` shape -> (Channels, Height, Width) ; (3, 224, 224) in AlexNet

In [68]:
class ImageTransformer:
    def __init__(self):
        self._img_size = 256
        self._img_crop_size = 224
        self._mean = [0.485, 0.456, 0.406]
        self._std = [0.229, 0.224, 0.225]
        self._required_channels = "RGB"

        self.transform = transforms.Compose([
            transforms.Resize(self._img_size),
            transforms.CenterCrop(self._img_crop_size),
            transforms.Lambda(lambda x: x.convert(self._required_channels) if x.mode != self._required_channels else x),
            transforms.ToTensor(),
            transforms.Normalize(mean=self._mean, std=self._std)
        ])

# Loading Data

In [69]:
# Create the data loaders for train, test, and validation
train_dataset = ImageDataset(train_images_dict, transform=ImageTransformer().transform)
test_dataset = ImageDataset(test_images_dict, transform=ImageTransformer().transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [70]:
import torch.nn as nn
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [71]:
def train_one_epoch(epoch_index):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(train_loader):
      #print(data)
      # Every data instance is an input + label pair
      inputs, labels = data
      print("input is ", inputs[0].shape)

      # Zero your gradients for every batch!
      optimizer.zero_grad()

      # Make predictions for this batch
      output = model(inputs)
      output[0].shape
      # Compute the loss and its gradients
      loss = loss_fn(output, labels)
      loss.backward()

      # Adjust learning weights
      optimizer.step()

      # Gather data and report
      running_loss += loss.item()
    last_loss = running_loss /10    # loss per batch

    return last_loss

In [72]:
# Initializing in a separate cell so we can easily add more epochs to the same run
from datetime import datetime
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
#writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))


In [73]:
EPOCHS = 10
epoch_number = 0
best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number)

    # We don't need gradients on to do reporting
    model.train(False)

    running_test_loss = 0.0
    for i, test_data in enumerate(test_loader):
        test_inputs, test_labels = test_data
        test_outputs = model(test_inputs)
        test_loss = loss_fn(test_outputs, test_labels)
        running_test_loss += test_loss

    avg_test_loss = running_test_loss / (i + 1)
    print('LOSS train {} test {}'.format(avg_loss, avg_test_loss))

    # Log the running loss averaged per batch
    # for both training and validation

    # Track best performance, and save the model's state
    # if avg_test_loss < best_vloss:
    #     best_vloss = avg_test_loss
    #     model_path = 'model_{}_{}'.format(timestamp, epoch_number)
    #     torch.save(model.state_dict(), model_path)

    epoch_number += 1
    if epoch_number == 5 or epoch_number == 10:
      path = "AlexnetModels/model_new_{}epochs".format(epoch_number)
      torch.save(model, path)

EPOCH 1:
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
LOSS train 0.4718326926231384 test 1.2501769065856934
EPOCH 2:
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
LOSS train 0.3501650333404541 test 0.9933612942695618
EPOCH 3:
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
LOSS train 0.27775235176086427 test 0.9115647077560425
EPOCH 4:
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
LOSS train 0.2501747727394104 test 0.8654217720031738
EPOCH 5:
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
LOSS train 0.23672982454299926 test 0.8386901021003723
EPOCH 6:
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
input is  torch.Size([3, 224, 224])
LOSS train 0.1798469126224