In [1]:
from tqdm import tqdm
import numpy as np
import random
import torch
from torch import nn, optim
from torch.utils import data
from torchvision import datasets, transforms, models

import medmnist
from medmnist import INFO, Evaluator
from numpy.random import RandomState
from torch.utils.data import Subset
import re

In [2]:
def train(model, device, train_loader, optimizer, epoch, display=True):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.binary_cross_entropy_with_logits(output, target.float())
        loss.backward()
        optimizer.step()
    if display:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))


def test(model, device, test_loader, name="\nVal"):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.binary_cross_entropy_with_logits(
                output, target.float(), size_average=False).item()  # sum up batch loss
            pred = output >= 0.5
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        name, test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layers = nn.ModuleList()

        self.layers += [nn.Conv2d(1, 16,  kernel_size=3),
                        nn.ReLU(inplace=True)]
        self.layers += [nn.Conv2d(16, 16,  kernel_size=3, stride=2),
                        nn.ReLU(inplace=True)]
        self.layers += [nn.Conv2d(16, 32,  kernel_size=3),
                        nn.ReLU(inplace=True)]
        self.layers += [nn.Conv2d(32, 32,  kernel_size=3, stride=2),
                        nn.ReLU(inplace=True)]
        self.fc = nn.Linear(32*4*4, 1)

    def forward(self, x):
        for i in range(len(self.layers)):
            x = self.layers[i](x)
        x = x.view(-1, 32*4*4)
        x = self.fc(x)
        return x

In [4]:
from transformers import ViTForImageClassification, ViTConfig, ViTFeatureExtractor

In [5]:
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(0)
np.random.seed(0)

# preprocessing
data_flag = 'pneumoniamnist'
download = True

info = INFO[data_flag]
n_classes = len(info['label'])
DataClass = getattr(medmnist, info['python_class'])

In [6]:
data_transform = transforms.Compose([
      transforms.Resize(224),
      transforms.ToTensor(),
      transforms.Normalize(mean=[.5], std=[.5]),
      transforms.Lambda(lambda x: x.repeat(3, 1, 1) )
      ])

In [7]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# load the data
train_dataset = DataClass(split='train', transform=data_transform, download=download)
val_dataset = DataClass(split='train', transform=data_transform, download=download)

accs_val = []

for seed in  range(0, 3):
  prng = RandomState(seed)
  random_permute = prng.permutation(np.arange(0, 1000))
  train_top = 10//n_classes
  val_top = 1000//n_classes
  indx_train = np.concatenate([np.where(train_dataset.labels == label)[0][random_permute[0:train_top]] for label in range(0, n_classes)])
  indx_val = np.concatenate([np.where(train_dataset.labels == label)[0][random_permute[train_top:train_top + val_top]] for label in range(0, n_classes)])

  train_data = Subset(train_dataset, indx_train)
  val_data = Subset(val_dataset, indx_val)

  print('Num Samples For Training %d Num Samples For Val %d'%(train_data.indices.shape[0],val_data.indices.shape[0]))

  train_loader = torch.utils.data.DataLoader(train_data,
                                             batch_size=32, 
                                             shuffle=True)

  val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=128, 
                                             shuffle=False)
  model = models.vgg16(pretrained=True)
  model.classifier = nn.Linear(512 * 7 * 7, 1)

  model.to(device).train() 

  criterion = nn.CrossEntropyLoss()

  # Observe that all parameters are being optimized
#   optimizer = optim.Adam(model.classifier.parameters(),lr=1e-3)
  optimizer = optim.SGD(model.classifier.parameters(), lr=0.001, momentum=0.9)

  for epoch in range(10):
    train(model, device, train_loader, optimizer, epoch, display=epoch%5==0)
  accs_val.append(test(model, device, val_loader))

accs_val = np.array(accs_val)

print('Val acc over 5 instances on dataset: %s %.2f +- %.2f'%(data_flag, accs_val.mean(), accs_val.std()))

Using downloaded and verified file: /home/z_yuxian/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /home/z_yuxian/.medmnist/pneumoniamnist.npz
Num Samples For Training 10 Num Samples For Val 1000





Val set: Average loss: 0.3712, Accuracy: 879/1000 (87.90%)

Num Samples For Training 10 Num Samples For Val 1000

Val set: Average loss: 0.4339, Accuracy: 869/1000 (86.90%)

Num Samples For Training 10 Num Samples For Val 1000

Val set: Average loss: 0.3748, Accuracy: 873/1000 (87.30%)

Val acc over 5 instances on dataset: pneumoniamnist 87.37 +- 0.41
