In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import json
from PIL import Image
import os
from torch.utils.data import random_split
import torch.nn as nn
import torch.optim as optim
import torchsummary
import torch.nn.init as init

In [2]:
# Download data in json format

os.makedirs("images_train", exist_ok=True)
os.makedirs("images_test", exist_ok=True)


train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)


test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

# Create JSON files for training and test datasets
def create_json(dataset, json_filename):
    data_list = []
    for i, (image, label) in enumerate(dataset):
        data_list.append({"img_path": f"{i}.png", "labels": label})
        base_name = os.path.basename(json_filename)
        type_data = base_name.split('_')[0] # returns train or test
        directory = "images_" + type_data
        image_path = os.path.join(directory, f"{i}.png")
        torchvision.utils.save_image(image, image_path)
    with open(json_filename, 'w') as json_file:
      for data in data_list:
        json.dump(data, json_file)
        json_file.write('\n')

create_json(train_data, "train_data.json")
create_json(test_data, "test_data.json")

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 16555115.05it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 299659.14it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5558579.48it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 15015491.65it/s]


Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [3]:
# Dataset customed

class CustomImageDataset(Dataset):

    def __init__(self, json_filename, transform=None):
        """
        Arguments:
            json_filename (string): Json file with images paths and labels.
            transform (callable, optional): Optional transform to be applied.
        """
        self.data_list = []
        with open(json_filename, 'r') as file:
          for line in file:
              loaded_dict = json.loads(line)
              self.data_list.append(loaded_dict)
        self.transform = transform
        self.json_filename = json_filename

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
      img_path_aux = self.data_list[idx]['img_path']
      base_name = os.path.basename(self.json_filename)
      type_data = base_name.split('_')[0]
      directory = "images_" + type_data
      img_path = os.path.join(directory, img_path_aux)
      image = Image.open(img_path)
      if self.transform:
            image = self.transform(image)

      label = self.data_list[idx]['labels']
      return image, label


# Transforming the image in getitem
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    #transforms.RandomCrop((224,224))
    transforms.Grayscale(num_output_channels=1), # 28x28x1
    transforms.ToTensor()
    #transforms.ToTensor(),
    #transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = CustomImageDataset("train_data.json", transform)
test_dataset = CustomImageDataset("test_data.json", transform)

In [4]:
# This code divides the training in train-val data

# 70 % of 60.000 training and 30% validation
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

print("Train size: ", train_size, "\nValidation size: ", val_size, "\nTest size: ", len(test_dataset))
new_train_dataset, new_val_dataset = random_split(train_dataset, [train_size, val_size])

batch_size = 32
train_loader = DataLoader(new_train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(new_val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

Train size:  48000 
Validation size:  12000 
Test size:  10000


In [16]:
# BASELINE: LeNet-5 architecture
class LeNet5(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature = nn.Sequential(
            # convolution: 6 out channels/filters = 6@28x28, a convolution of 5x5 is applied -> kernel_size 5
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),   # padding 2 because input images are 28x28 (not 32x32)
            nn.ReLU(),
            # average pooling
            nn.AvgPool2d(kernel_size=2, stride=2),  # 14*14
            # convolution: it recieves 6 from previous convolution and now 16@10x10
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),

        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=16*5*5, out_features=120),
            nn.ReLU(),
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Linear(in_features=84, out_features=10),
        )

    def forward(self, x):
      return self.classifier(self.feature(x))


# Kaiming Uniform initialization
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            init.constant_(m.bias, 0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # in Alex computer no cuda, already checked, Alimo you could check

baseline_model = LeNet5().to(device)
baseline_model.apply(init_weights)
optimizer = optim.Adam(baseline_model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

torchsummary.summary(baseline_model, input_size=(1,28 , 28))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
              ReLU-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              ReLU-5           [-1, 16, 10, 10]               0
         AvgPool2d-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
              ReLU-9                  [-1, 120]               0
           Linear-10                   [-1, 84]          10,164
             ReLU-11                   [-1, 84]               0
           Linear-12                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
---------------------------------

In [30]:
# Alternative 1: Using Dropout to see less overfiting
class CNN1(nn.Module):
    def __init__(self):
      super().__init__()

      self.feature = nn.Sequential(
            # convolution: 6 out channels/filters = 6@28x28, a convolution of 5x5 is applied -> kernel_size 5
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),   # padding 2 because input images are 28x28 (not 32x32)
            nn.ReLU(),
            # average pooling
            nn.AvgPool2d(kernel_size=2, stride=2),  # 14*14
            # convolution: it recieves 6 from previous convolution and now 16@10x10
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),
        )
      self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=16*5*5, out_features=120),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(in_features=84, out_features=10),
        )
    def forward(self, x):
      return self.classifier(self.feature(x))


cnn1 = CNN1().to(device)
cnn1.apply(init_weights)
optimizer = optim.Adam(cnn1.parameters(), lr=0.001)

torchsummary.summary(cnn1, input_size=(1,28 , 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
              ReLU-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              ReLU-5           [-1, 16, 10, 10]               0
         AvgPool2d-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
              ReLU-9                  [-1, 120]               0
          Dropout-10                  [-1, 120]               0
           Linear-11                   [-1, 84]          10,164
             ReLU-12                   [-1, 84]               0
          Dropout-13                   [-1, 84]               0
           Linear-14                   

In [40]:
def train(model, device, train_loader, optimizer, loss_fn):
  model.train()
  running_loss = 0
  for batch_idx, (images, labels) in enumerate(train_loader):
      images, labels = images.to(device), labels.to(device)

      optimizer.zero_grad()

      #print(images.shape)
      # Forward Pass
      output = model(images)
      loss = loss_fn(output, labels)

      # Backward Pass
      loss.backward()
      optimizer.step() # updates model parameters using gradient computings by back propagation and applies the optimization algo

      running_loss += loss.item()
      if batch_idx % 100 == 99:
          print(f'Batch {batch_idx+1}/{len(train_loader)}, Loss: {running_loss/100:.4f}')
          running_loss = 0.0

def test(data_loader, model, loss_fn, type_test):
  with torch.no_grad():
      correct, total , loss = 0, 0, 0
      num_batches = len(data_loader)
      model.eval()
      for images, labels in data_loader:
          images, labels = images.to(device), labels.to(device)
          outputs = model(images)
          loss += loss_fn(labels.long(), outputs.float()).item()
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
  loss /= num_batches

  print(f"{type_test} accuracy: {100 * correct / total} %, Loss: {loss}")


In [42]:
def run (models, epochs = 5):
  training_results = []
  validation_results = []
  for model in models:
    print("Model: ", model)
    for t in range(epochs):
      print(f"Epoch {t+1}\n---------------------")
      training_accuracy, training_loss = train(baseline_model, device, train_loader, optimizer, loss_fn)
      validation_accuracy, validation_loss = test(val_loader, baseline_model, loss_fn, "Validation")
      training_results.append((training_accuracy, training_loss))
      validation_results.append((validation_accuracy, validation_loss))
  return training_results, validation_results




In [41]:
models = [baseline_model, cnn1]

training_results, validation_results = run(models, epochs = 1)

Model:  LeNet5(
  (feature): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=400, out_features=120, bias=True)
    (2): ReLU()
    (3): Linear(in_features=120, out_features=84, bias=True)
    (4): ReLU()
    (5): Linear(in_features=84, out_features=10, bias=True)
  )
)
Epoch 1
---------------------
Batch 100/1500, Loss: 0.4142
Batch 200/1500, Loss: 0.4051
Batch 300/1500, Loss: 0.3973
Batch 400/1500, Loss: 0.4073
Batch 500/1500, Loss: 0.4068
Batch 600/1500, Loss: 0.4074
Batch 700/1500, Loss: 0.3843
Batch 800/1500, Loss: 0.4132
Batch 900/1500, Loss: 0.4116
Batch 1000/1500, Loss: 0.3843
Batch 1100/1500, Loss: 0.4022
Batch 1200/1500, Loss: 0.3954
Batch 1300

RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Long'