In [1]:
 from google.colab import files
 files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"neomatriciel","key":"a575922acb114f5e887ffd9480a434e8"}'}

In [2]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download pranavraikokte/covid19-image-dataset
!unzip covid19-image-dataset.zip
!pip3 install pytorch_lightning 

Downloading covid19-image-dataset.zip to /content
 94% 149M/158M [00:01<00:00, 106MB/s]
100% 158M/158M [00:01<00:00, 99.3MB/s]
Archive:  covid19-image-dataset.zip
  inflating: Covid19-dataset/test/Covid/0100.jpeg  
  inflating: Covid19-dataset/test/Covid/0102.jpeg  
  inflating: Covid19-dataset/test/Covid/0105.png  
  inflating: Covid19-dataset/test/Covid/0106.jpeg  
  inflating: Covid19-dataset/test/Covid/0108.jpeg  
  inflating: Covid19-dataset/test/Covid/0111.jpg  
  inflating: Covid19-dataset/test/Covid/0112.jpg  
  inflating: Covid19-dataset/test/Covid/0113.jpg  
  inflating: Covid19-dataset/test/Covid/0115.jpeg  
  inflating: Covid19-dataset/test/Covid/0118.jpeg  
  inflating: Covid19-dataset/test/Covid/0119.jpeg  
  inflating: Covid19-dataset/test/Covid/0120.jpg  
  inflating: Covid19-dataset/test/Covid/094.png  
  inflating: Covid19-dataset/test/Covid/096.png  
  inflating: Covid19-dataset/test/Covid/098.jpeg  
  inflating: Covid19-dataset/test/Covid/COVID-00003b.jpg  
  inflat

In [3]:
import glob, os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.nn import functional as F
from torch import nn
import torch
from pytorch_lightning import Trainer
from PIL import Image
from sklearn.metrics import accuracy_score as accuracy
from sklearn.metrics import classification_report
from torchsummary import summary
import torchvision.models as models

In [4]:
class_dic = {'Covid' : 0, 'Normal': 1, 'Viral Pneumonia': 2}

In [5]:
def load_dataset(data_part):
    list_file, list_class = [], []
    for case in ['Covid', 'Normal', 'Viral Pneumonia']:
        path = f"/content/Covid19-dataset/{data_part}/{case}"
        os.chdir(path)
        for file in glob.glob("*"):
            list_file.append(path+'/'+file)
            list_class.append(class_dic[case])
    
    return list_file, list_class

train_list_file, train_list_class = load_dataset(data_part='train')
test_x, test_y = load_dataset(data_part='test')

train_x, val_x, train_y, val_y = train_test_split(train_list_file, train_list_class, stratify=train_list_class, test_size = .3)

In [6]:
def get_frequency(y):
    classes, freq = np.unique(y, return_counts=True)
    return np.array([classes, freq/len(y)]).T

In [7]:
print(get_frequency(train_y))
print(get_frequency(val_y))
print(get_frequency(test_y))

[[0.   0.44]
 [1.   0.28]
 [2.   0.28]]
[[0.         0.44736842]
 [1.         0.27631579]
 [2.         0.27631579]]
[[0.         0.39393939]
 [1.         0.3030303 ]
 [2.         0.3030303 ]]


In [8]:
class Covid19(Dataset):
    def __init__(self, x, y, transforms):
        self.x = x
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image = self.x[idx]
        image = Image.open(image).convert('RGB')
        image = self.transforms(image)

        label = self.y[idx]
        return image, label

In [9]:
class Covid19DataModule(pl.LightningDataModule):
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size

        # Augmentation policy for training set
        self.augmentation = transforms.Compose([
              transforms.Resize((224, 224)),
              transforms.RandomRotation(degrees=15),
              transforms.RandomHorizontalFlip(),
              transforms.ToTensor(),
              transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])
        # Preprocessing steps applied to validation and test set.
        self.transform = transforms.Compose([
              transforms.Resize((224, 224)),
              transforms.ToTensor(),
              transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])
        
        self.num_classes = 3

    def prepare_data(self):
        self.train = Covid19(x=train_x, y=train_y, transforms=self.augmentation)
        self.valid = Covid19(x=val_x, y=val_y, transforms=self.transform)
        self.test = Covid19(x=test_x, y=test_y, transforms=self.transform)
        
    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.valid, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size)

In [10]:
class LitModel(pl.LightningModule):
    def __init__(self, input_shape, num_classes, learning_rate=1e-4):
        super().__init__()

        # log hyperparameters
        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.dim = input_shape
        self.num_classes = num_classes

        # transfer learning if pretrained=True
        self.feature_extractor = models.resnet152(pretrained=True)
        # layers are frozen by using eval()
        self.feature_extractor.eval()
        # freeze params
        for param in self.feature_extractor.parameters():
            param.requires_grad = False

        n_sizes = self._get_conv_output(input_shape)

        self.fc1 = nn.Linear(n_sizes, 256)
        self.fc2 = nn.Linear(256, 64)
        self.classifier = nn.Linear(64, num_classes)

        self.preds = []

    # returns the size of the output tensor going into the Linear layer from the conv block.
    def _get_conv_output(self, shape):
        batch_size = 1
        tmp_input = torch.autograd.Variable(torch.rand(batch_size, *shape))

        output_feat = self._forward_features(tmp_input)
        n_size = output_feat.data.view(batch_size, -1).size(1)
        return n_size

    # returns the feature tensor from the conv block
    def _forward_features(self, x):
        x = self.feature_extractor(x)
        return x

    # will be used during inference
    def forward(self, x):
        x = self._forward_features(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = nn.Dropout(p=0.1)(x)
        x = F.relu(self.fc2(x))
        x = nn.Dropout(p=0.1)(x)
        x = F.log_softmax(self.classifier(x), dim=1)

        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        # training metrics
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(y.cpu().numpy(), preds.cpu().numpy())
        self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True)
        self.log("train_acc", acc, on_step=True, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        # validation metrics
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(y.cpu().numpy(), preds.cpu().numpy())
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        # validation metrics
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(y.cpu().numpy(), preds.cpu().numpy())

        self.preds = self.preds + list(preds.cpu().numpy())

        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", acc, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer


In [11]:
module = Covid19DataModule(batch_size=16)
model = LitModel(input_shape=(3, 224, 224), num_classes=3)
summary(model, input_size=(3, 224, 224), device='cpu')

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth


  0%|          | 0.00/230M [00:00<?, ?B/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [12]:
trainer = Trainer(max_epochs=20, gpus=torch.cuda.device_count(), log_every_n_steps=5)

trainer.fit(model, module)
trainer.test()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type   | Params
---------------------------------------------
0 | feature_extractor | ResNet | 60.2 M
1 | fc1               | Linear | 256 K 
2 | fc2               | Linear | 16.4 K
3 | classifier        | Linear | 195   
---------------------------------------------
272 K     Trainable params
60.2 M    Non-trainable params
60.5 M    Total params
241.863   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.9242424368858337, 'test_loss': 0.2540096044540405}
--------------------------------------------------------------------------------


[{'test_acc': 0.9242424368858337, 'test_loss': 0.2540096044540405}]

In [13]:
report = classification_report(test_y, model.preds)
print(report)

              precision    recall  f1-score   support

           0       1.00      0.96      0.98        26
           1       0.86      0.90      0.88        20
           2       0.90      0.90      0.90        20

    accuracy                           0.92        66
   macro avg       0.92      0.92      0.92        66
weighted avg       0.93      0.92      0.93        66

