## Image classification with deep learning methods.

-- Description --

When you train the network, it is recommended to use the GPU resources of your computer.
This will help you to learn the "know how" of setting up a working Python environment on your computer.
In the case of unavailable Nvidia hardware or problems with your Python environment you can use Google Colab.
Please go to the menu, Runtime - Change runtime type, and select **GPU** as the hardware accelerator.
Although you used your computer successfuly it is highly recommended to give a try to Google Colab environment.


In [3]:
# Import libraries
# These libraries should be sufficient for this Practice.
# However, if any other library is needed, please install it by yourself.

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torch.utils.data as data
import numpy as np
import time
import os
import random
import matplotlib.pyplot as plt
from matplotlib import colors
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import tensorflow as tf

!pip install medmnist
import medmnist
from medmnist import *

Collecting medmnist
  Downloading medmnist-3.0.1-py3-none-any.whl (25 kB)
Collecting fire (from medmnist)
  Downloading fire-0.6.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.4/88.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->medmnist)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->medmnist)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->medmnist)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->medmnist)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->medmnist

In [39]:
# Parameters
NUM_EPOCHS = 20
BATCH_SIZE = 28
lr = 0.0001
DOWNLOAD_OK = True
data_flag = 'bloodmnist'
im_size = 3*28*28
info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
num_classes = len(info['label'])
N_IMAGES = 1000
data_labels = info['label']

# Tupla que contiene los valores asociados a los parámetros mostrados anteriormente,
# con el fin de mejorar la organización de dichos parámetros.
parameters = {"num_epochs": NUM_EPOCHS, "batch_size": BATCH_SIZE, "lr": lr, "download_ok": DOWNLOAD_OK,
              "data_flag": data_flag, "im_size": im_size,"info_task": task, "n_channels": n_channels,
              "num_classes": num_classes,"n_images":N_IMAGES, "data_labels": data_labels}

# Preprocesado de datos mediante la definición de la transformación de datos
def preprocessing_data(parameters):
  data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
  ])

  full_train_dataset = BloodMNIST(split="train", transform=data_transform, download=True)
  full_valid_dataset = BloodMNIST(split="val", transform=data_transform, download=True)
  full_test_dataset = BloodMNIST(split="test", transform=data_transform, download=True)

  idx_train = np.random.choice(len(full_train_dataset),size=700,replace=False)
  train_dataset = [full_train_dataset[i] for i in idx_train]

  print(len(train_dataset))

  idx_valid = np.random.choice(len(full_valid_dataset),size=100,replace=False)
  valid_dataset = [full_valid_dataset[i] for i in idx_valid]

  print(len(valid_dataset))

  idx_test = np.random.choice(len(full_test_dataset),size=200,replace=False)
  test_dataset = [full_test_dataset[i] for i in idx_test]


  train_loader = data.DataLoader(dataset=train_dataset, batch_size=parameters["batch_size"], shuffle=True)
  valid_loader = data.DataLoader(dataset=valid_dataset, batch_size=parameters["batch_size"], shuffle=False)
  test_loader = data.DataLoader(dataset=test_dataset, batch_size=parameters["batch_size"], shuffle=True)

  return train_loader, valid_loader, test_loader

train_loader,valid_loader,test_loader = preprocessing_data(parameters)

Using downloaded and verified file: /root/.medmnist/bloodmnist.npz
Using downloaded and verified file: /root/.medmnist/bloodmnist.npz
Using downloaded and verified file: /root/.medmnist/bloodmnist.npz
700
100
200


In [None]:
# Your code

# Función que permite visualizar los aspectos fundamentales sobre cada
# dataset que se haya descargado/cargado previamente.
def dataset_visualizer(dataset,length_montage):

  for i in range(0,length_montage*length_montage):
    img = dataset[i][0]
    label = str(dataset[i][1]).replace('[','')
    figure = plt.figure(figsize=(2,2))
    plt.imshow(img.permute(1,2,0))
    plt.title(data_labels[label.replace(']','')])
    plt.axis("off")
  plt.show()

# Visualizador de las imágenes a través de un pipeline DataLoader
def dataloader_visualizer(dataset,num_batches):
  data_loader = data.DataLoader(dataset,batch_size=28,shuffle=True)
  for batch_idx, (features, labels) in enumerate(data_loader):
      if batch_idx >= num_batches:
        break
      for i in range(len(features)):
        img = features[i].squeeze()
        label = str(labels[i]).replace('tensor([','').replace('])','')
        plt.figure(figsize=(2,2))
        plt.title(label)
        plt.imshow(img.permute(1,2,0))
        plt.axis('off')
        plt.show()


dataloader_visualizer(blood_train_dataset,1)

In [2]:


# Función que permite encapsular la carga de los datasets
def dataset_loading(train_dataset,valid_dataset,test_dataset,parameters):
  train_loader = data.DataLoader(dataset=train_dataset, batch_size=parameters["batch_size"], shuffle=True)
  valid_loader = data.DataLoader(dataset=valid_dataset, batch_size=parameters["batch_size"], shuffle=False)
  test_loader = data.DataLoader(dataset=test_dataset, batch_size=parameters["batch_size"], shuffle=True)

  return train_loader, valid_loader, test_loader

# Función que representa la métrica de error asociada al modelo predictivo
def loss_function(task):
  return torch.nn.CrossEntropyLoss()

# Función que representa el optimizador utilizado para el modelo predictivo
def model_optimizer(model):
  #return optim.SGD(model.parameters(), lr=lr, momentum=0.9)
  return torch.optim.Adam(model.parameters(),lr=lr)

train_loader,valid_loader,test_loader = dataset_loading(blood_train_dataset,
                                                        blood_valid_dataset,
                                                        blood_test_dataset,
                                                        parameters)

print(len(train_loader))
print(len(valid_loader))
print(len(test_loader))


NameError: name 'INFO' is not defined

#Create a deep learning model

In [2]:
# Define a simple CNN model

class Net(nn.Module):
    def __init__(self, in_channels, num_classes, im_size):
        super(Net, self).__init__()
        #Define the desired deep learning model
        #Your code

        #Primera capa del modelo de red convolucional
        self.layer1 = nn.Sequential(
            nn.Conv2d(n_channels,10,kernel_size=3,padding=1),
            nn.ReLU()
        )

        #Segunda capa del modelo de red convolucional
        self.layer2 = nn.Sequential(
            nn.Conv2d(10,50,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Flatten()
        )

        #Tercera capa del modelo de red convolucional
        #self.layer3 = nn.Sequential(
            #nn.Conv2d(20, 30, kernel_size=3),
            #nn.BatchNorm2d(64),
            #nn.ReLU()
        #)

        #Cuarta capa del modelo de red convolucional
        #self.layer4 = nn.Sequential(
            #nn.Conv2d(30, 40, kernel_size=3),
            #nn.BatchNorm2d(64),
            #nn.ReLU()
        #)

        #Quinta capa del modelo de red convolucional
        #self.layer5 = nn.Sequential(
            #nn.Conv2d(40, 50, kernel_size=3, padding=1),
            #nn.BatchNorm2d(64),
            #nn.ReLU(),
            #nn.MaxPool2d(kernel_size=2, stride=2)
        #)

        # Capa FC (full-conected)
        self.fc = nn.Sequential(
            nn.Linear(14*14*50,28)
        )

        #End your code

    def forward(self, x):
        #Your code

        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        #End your code

def net_model_visualizer(net_model):
  print(net_model)
  print('Total Parameters:',
       sum([torch.numel(p) for p in net_model.parameters()])
  )
  print('Trainable Parameters:',
       sum([torch.numel(p) for p in net_model.parameters() if p.requires_grad])
  )

#model = Net(in_channels=n_channels, num_classes=num_classes, im_size = im_size)

In [None]:
# Train the model
def train_epoch(model, train_loader, optimizer, criterion, task):

    correct = 0
    total_loss = 0
    total_samples = 0

    for batch_idx, (X, y) in enumerate(train_loader):
        model.train()
        pred = model(X)

        loss = criterion(pred,y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(pred, 1)
        total_samples += y.size(0)
        correct += (predicted == y).sum().item()
        total_loss += loss.item() * y.size(0)

    return {
        "train_acc": correct / total_samples,
        "train_loss": total_loss / total_samples,
    }

def centralized_training(train_loader,test_loader,parameters):
  centralized_model = Net(in_channels=parameters["n_channels"],
                            num_classes=parameters["num_classes"],
                            im_size=parameters["im_size"])

  net_model_visualizer(centralized_model)
  optimizer = torch.optim.Adam(centralized_model.parameters(),lr=lr)
  criterion = torch.nn.MSELoss()

  for epoch in range(parameters["num_epochs"]):
    train_history = train_epoch(centralized_model,train_loader,optimizer,criterion,task)
    #valid_history = validate_epoch(centralized_model,test_loader,criterion,task)
    #print(f'Epoch {epoch}: {train_history} - {valid_history}')



centralized_training(train_loader,test_loader,parameters)

#Evaluation

Finally, implement the evaluation of the object clasification task. You can implement any metric you want, though the most common are accuracy and AUC (one class against all for the multiclass task). You can use torch.no_grad() for speeding up predictions when no gradients are needed.

How do you compare with the MedMNIST benchmarks?

In [None]:
# Evaluation

# Your code
def model_evaluation_dataset(split):
  model.eval()

  y_true = torch.tensor([])
  y_score = torch.tensor([])

  data_loader = train_loader_at_eval if split == 'train' else test_loader

  with torch.no_grad():
    for inputs, targets in data_loader:
      outputs = model(inputs)

      if task == 'multi-label, binary-class':
        targets = targets.to(torch.float32)
        outputs = outputs.softmax(dim=-1)
      else:
        targets = targets.squeeze().long()
        outputs = outputs.softmax(dim=-1)
        targets = targets.float().resize_(len(targets), 1)

      y_true = torch.cat((y_true, targets), 0)
      y_score = torch.cat((y_score, outputs), 0)

    y_true = y_true.numpy()
    y_score = y_score.detach().numpy()

    evaluator = Evaluator(data_flag, split)
    metrics = evaluator.evaluate(y_score)

    print('%s  auc: %.3f  acc:%.3f' % (split, *metrics))

print('==> Evaluating ...')
model_evaluation_dataset('test')

==> Evaluating ...


NameError: name 'test' is not defined