In [46]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import json
from PIL import Image
import os
from torch.utils.data import random_split
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

In [3]:
# Download data in json format

os.makedirs("images_train", exist_ok=True)
os.makedirs("images_test", exist_ok=True)


train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)


test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

# Create JSON files for training and test datasets
def create_json(dataset, json_filename):
    data_list = []
    for i, (image, label) in enumerate(dataset):
        data_list.append({"img_path": f"{i}.png", "labels": label})
        base_name = os.path.basename(json_filename)
        type_data = base_name.split('_')[0] # returns train or test
        directory = "images_" + type_data
        image_path = os.path.join(directory, f"{i}.png")
        torchvision.utils.save_image(image, image_path)
    with open(json_filename, 'w') as json_file:
      for data in data_list:
        json.dump(data, json_file)
        json_file.write('\n')

create_json(train_data, "train_data.json")
create_json(test_data, "test_data.json")

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 14535732.30it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 251562.95it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 4889624.14it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 11187708.29it/s]


Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [35]:
# Dataset customed

class CustomImageDataset(Dataset):

    def __init__(self, json_filename, transform=None):
        """
        Arguments:
            json_filename (string): Json file with images paths and labels.
            transform (callable, optional): Optional transform to be applied.
        """
        self.data_list = []
        with open(json_filename, 'r') as file:
          for line in file:
              loaded_dict = json.loads(line)
              self.data_list.append(loaded_dict)
        self.transform = transform
        self.json_filename = json_filename

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
      img_path_aux = self.data_list[idx]['img_path']
      base_name = os.path.basename(self.json_filename)
      type_data = base_name.split('_')[0]
      directory = "images_" + type_data
      img_path = os.path.join(directory, img_path_aux)
      image = Image.open(img_path)
      if self.transform:
            image = self.transform(image)

      label = self.data_list[idx]['labels']
      return image, label


# Transforming the image in getitem
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    #transforms.RandomCrop((224,224))
    transforms.ToTensor()
    #transforms.ToTensor(),
    #transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = CustomImageDataset("train_data.json", transform)
test_dataset = CustomImageDataset("test_data.json", transform)

In [47]:
# This code divides the training in train-val data

# 70 % of 60.000 training and 30% validation
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

print("Train size: ", train_size, "\nValidation size: ", val_size, "\nTest size: ", len(test_dataset))
new_train_dataset, new_val_dataset = random_split(train_dataset, [train_size, val_size])

batch_size = 32
train_loader = DataLoader(new_train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(new_val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

Train size:  48000 
Validation size:  12000 
Test size:  10000


In [48]:
# BASELINE: LeNet-5 architecture
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)  # Padding to match input size
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5x5 image dimension after max pooling
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = torch.relu(self.conv2(x))
        x = self.maxpool2(x)
        x = x.view(-1, 16 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return torch.log_softmax(x, dim=1)

# Initialize the weights using Kaiming Uniform initialization
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
        if m.bias is not None:
            init.constant_(m.bias, 0)

# Model, optimizer, and loss function initialization
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # in Alex computer no cuda, already checked, Alimo you could check
model = LeNet5().to(device)
model.apply(init_weights)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
def train(model, device, train_loader, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if batch_idx % 100 == 99:
                print(f'Epoch {epoch+1}, Batch {batch_idx+1}/{len(train_loader)}, Loss: {running_loss/100:.4f}')
                running_loss = 0.0

# Train the model
train(model, device, train_loader, optimizer, criterion, epochs=5)


RuntimeError: Given groups=1, weight of size [6, 1, 5, 5], expected input[32, 3, 256, 256] to have 1 channels, but got 3 channels instead