In [19]:
import matplotlib.pyplot as plt
import numpy as np
import os
import torch
base_dir = "C://DATA//train//49_kaggle//KaggleFun//ChestXRay//DATA//chest_xray"

In [20]:
from torchvision import transforms
image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    # Validation does not use augmentation
    'test':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [21]:
from torchvision.datasets import ImageFolder

train_ds = ImageFolder(root=os.path.join(base_dir,"train"), transform=image_transforms['train'])
test_ds = ImageFolder(root=os.path.join(base_dir,"test"), transform=image_transforms['test'])
print(len(train_ds), len(test_ds))
class_names = train_ds.classes
print(class_names)

5216 640
['NORMAL', 'PNEUMONIA']


In [23]:
from torch.utils.data.dataloader import DataLoader

batch_size = 64

train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=3, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size, num_workers=3, pin_memory=True)

device = torch.device("cpu") if not torch.cuda.is_available() else torch.device("cuda")

In [24]:
images, labels = iter(train_dl).next()
images.shape

torch.Size([64, 3, 224, 224])

In [25]:
import torchvision.models as models
net = models.resnet18(pretrained=True)
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [26]:
for param in net.parameters():
    param.requires_grad = True

In [27]:
import torch.nn as nn

num_ftrs = net.fc.in_features
net.fc = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256,2),
)

net = net.to(device)

In [29]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)
# checking if GPU is available
if torch.cuda.is_available():
    net = net.cuda()
    criterion = criterion.cuda()
EPOCHS = 5
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [30]:
def train():
    for epoch in range(EPOCHS):  # loop over the dataset multiple times
        net.train()
        running_loss = 0.0
        total, correct = 0, 0
        for i, data in enumerate(train_dl, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
        print("Epoch: {}  Train Acc: {:.2f}".format(epoch, correct/total*100))

        net.eval()
        total, correct = 0, 0
        for i, data in enumerate(test_dl, 0):
            inputs, labels = data
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print("Epoch: {}  Test Acc: {:.2f}".format(epoch, correct/total*100))
    print('Finished Training')

In [31]:
train()

Epoch: 0  Train Acc: 64.61
Epoch: 0  Test Acc: 64.22
Epoch: 1  Train Acc: 65.28
Epoch: 1  Test Acc: 64.69
Epoch: 2  Train Acc: 65.87
Epoch: 2  Test Acc: 64.69
Epoch: 3  Train Acc: 65.20
Epoch: 3  Test Acc: 64.84
Epoch: 4  Train Acc: 65.86
Epoch: 4  Test Acc: 65.47
Finished Training
