In [2]:
import medmnist
from medmnist import INFO, Evaluator

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

import numpy as np

In [6]:
DATASET = "bloodmnist"
NUM_EPOCHS = 3
BATCH_SIZE = 128
lr = 0.001

dataset_info = INFO[DATASET]
task = dataset_info['task']
num_channels = dataset_info['n_channels']
num_classes = len(dataset_info['label'])

DatasetClass = getattr(medmnist, dataset_info['python_class'])

task: multi-class
num_channels: 3
num_classes: 8


In [8]:
data_transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize(mean=[.5], std=[0.5])
])

Compose(
    ToTensor()
    Normalize(mean=[0.5], std=[0.5])
)


In [9]:
dset_train = DatasetClass(split='train', transform=data_transform, download=True)
dset_test = DatasetClass(split='test', transform=data_transform, download=True)

Using downloaded and verified file: /Users/carlosgil/.medmnist/bloodmnist.npz
Using downloaded and verified file: /Users/carlosgil/.medmnist/bloodmnist.npz
Dataset BloodMNIST (bloodmnist)
    Number of datapoints: 11959
    Root location: /Users/carlosgil/.medmnist
    Split: train
    Task: multi-class
    Number of channels: 3
    Meaning of labels: {'0': 'basophil', '1': 'eosinophil', '2': 'erythroblast', '3': 'immature granulocytes(myelocytes, metamyelocytes and promyelocytes)', '4': 'lymphocyte', '5': 'monocyte', '6': 'neutrophil', '7': 'platelet'}
    Number of samples: {'train': 11959, 'val': 1712, 'test': 3421}
    Description: The BloodMNIST is based on a dataset of individual normal cells, captured from individuals without infection, hematologic or oncologic disease and free of any pharmacologic treatment at the moment of blood collection. It contains a total of 17,092 images and is organized into 8 classes. We split the source dataset with a ratio of 7:1:2 into training, val

In [14]:
dload_train = data.DataLoader(dataset=dset_train, batch_size=BATCH_SIZE, shuffle=True)
dload_val = data.DataLoader(dataset=dset_train, batch_size=2*BATCH_SIZE, shuffle=False)
dload_test = data.DataLoader(dataset=dset_test, batch_size=2*BATCH_SIZE, shuffle=True)

In [15]:
class Net(nn.Module):
  def __init__(self, in_channels, num_classes):
    super(Net, self).__init__()

    self.layer1 = nn.Sequential(
      nn.Conv2d(in_channels, 16, kernel_size=3),
      nn.BatchNorm2d(16),
      nn.ReLU()
    )

    self.layer2 = nn.Sequential(
      nn.Conv2d(16, 16, kernel_size=3),
      nn.BatchNorm2d(16),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2)
    )

    self.layer3 = nn.Sequential(
      nn.Conv2d(16, 64, kernel_size=3),
      nn.BatchNorm2d(64),
      nn.ReLU()
    )

    self.layer4 = nn.Sequential(
      nn.Conv2d(64, 64, kernel_size=3),
      nn.BatchNorm2d(64),
      nn.ReLU()
    )

    self.layer5 = nn.Sequential(
      nn.Conv2d(64, 64, kernel_size=3, padding=1),
      nn.BatchNorm2d(64),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2)
    )

    self.fc = nn.Sequential(
      nn.Linear(64 * 4 * 4, 128),
      nn.ReLU(),
      nn.Linear(128, 128),
      nn.ReLU(),
      nn.Linear(128, num_classes)
    )

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    x = self.layer5(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)
    return x

In [23]:
model = Net(in_channels=num_channels, num_classes=num_classes)

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

In [26]:
for _ in range(NUM_EPOCHS):
  model.train()

  for inputs, targets in dload_train:
    optimizer.zero_grad()
    outputs = model(inputs)

    targets = targets.squeeze().long()
    loss = criterion(outputs, targets)

    loss.backward()
    optimizer.step()

In [41]:
#Data visualization


  dset_bloodMNIST.extend(np.array([input.numpy(), target.numpy()]))


ValueError: could not broadcast input array from shape (3,28,28) into shape (1,)