In [1]:
import numpy as np
import pandas as pd 
import os
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset,DataLoader
import torchvision 
from torchvision import transforms, models
from tqdm.notebook import tqdm
from PIL import Image
import seaborn as ss


In [106]:
from glob import glob
import torch.nn.functional as F
from torch import nn, optim

In [28]:
data_dir = {
    "train" : "/media/brats/DRIVE1/akansh/DeepXrays/Data/raw/Chest XR covid/train/",
    "valid": "/media/brats/DRIVE1/akansh/DeepXrays/Data/raw/Chest XR covid/validation/",
    "test" : "/media/brats/DRIVE1/akansh/DeepXrays/Data/raw/Chest XR covid/test/"
}

In [29]:
label_csv = {"train": '/media/brats/DRIVE1/akansh/DeepXrays/Data/processed/train_labels.csv',
            "valid": "/media/brats/DRIVE1/akansh/DeepXrays/Data/processed/validation_labels.csv",
            "test": "/media/brats/DRIVE1/akansh/DeepXrays/Data/raw/submission.csv"}

In [30]:
train = pd.read_csv(label_csv['train'])
valid = pd.read_csv(label_csv['valid'])
test = pd.read_csv(label_csv['test'])

In [49]:
filenames = []
for root, dirs, files in os.walk(data_dir['train']):
    for file in files:
        if file.endswith('.jpg') or file.endswith('.png') :
            filenames.append(file)
    

In [131]:
### Dataloader
class covid_dataset(Dataset):
    def __init__(self, image_loc, label_loc, transform, data_type = 'train'):
        filenames = []
        for root, dirs, files in os.walk(image_loc):
            for file in files:
                if file.endswith('.jpg') or file.endswith('.png') :
                    filenames.append(file)
        if data_type == 'train' or data_type == 'valid':
            self.full_filenames = glob(image_loc+'*/*.jpg')
        else:
            self.full_filenames = glob(image_loc+'*.jpg')
            
            
        label_df = pd.read_csv(label_loc)
        label_df.set_index("case", inplace = True)
        self.labels = [label_df.loc[filename].values[0] for filename in filenames]
        
        self.transform = transform
        
    def __len__(self):
        return len(self.full_filenames)
    
    def __getitem__(self,idx):
        image = Image.open(self.full_filenames[idx])
#         if len(image.size) != 3:
#             image = image.convert('RGB')
        image = self.transform(image)
        return image, self.labels[idx]
        

In [132]:
sample_transform = transforms.Compose([transforms.ToTensor()])

In [133]:
train_data = covid_dataset(image_loc = data_dir['train'],
                         label_loc = label_csv['train'],
                         transform = sample_transform)

In [134]:
for x,y in train_data:
    print(x.shape)
    print(y)
    break

torch.Size([1, 512, 512])
0


In [135]:
data_transforms = {
    
    'train': transforms.Compose([
        transforms.Resize((256,256)),
        transforms.CenterCrop((254,254)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation((-30,30)),
        transforms.ToTensor(),
        transforms.Normalize([0.5027,0.5027,0.5027],[0.2466, 0.2466, 0.2466])
        
    ]),
    
    'valid' : transforms.Compose([
        transforms.Resize((224,224)),
#         transforms.CenterCrop((254,254)),
        transforms.ToTensor(),
        transforms.Normalize([0.5027,0.5027,0.5027],[0.2466, 0.2466, 0.2466])
    ])
    
    
}

In [136]:
valid_data = covid_dataset(image_loc = data_dir['valid'],
                          label_loc = label_csv['valid'],
                          transform = data_transforms['valid'],
                          data_type = 'valid')

In [137]:
validloader = DataLoader(valid_data,batch_size = 32,shuffle = False)

In [138]:
saved_model = torch.load("./baseline_2_model.pt")

In [139]:
resnet50 = models.resnet50(pretrained=True)
resnet50.fc = nn.Sequential(
    nn.Linear(2048, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 3),
    nn.LogSoftmax(dim=1) # For using NLLLoss()
)

for param in resnet50.parameters():
    param.require_grad = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = resnet50
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [140]:
def accuracy(y_pred,y_true):
    y_pred = torch.exp(y_pred)
    top_p,top_class = y_pred.topk(1,dim = 1)
    equals = top_class == y_true.view(*top_class.shape)
    return torch.mean(equals.type(torch.FloatTensor))

In [142]:
model.eval()

test_loss = 0
test_acc = 0

for images,labels in tqdm(validloader):
    
    images = images.to(device)
    labels = labels.to(device)
    
    pred = model(images)
#     loss = criterion(pred,labels)
    
#     test_loss += loss.item()
    test_acc += accuracy(pred,labels)
    
# avg_test_loss = test_loss/len(validloader)
avg_test_acc = test_acc/len(validloader)


  0%|          | 0/6 [00:00<?, ?it/s]

RuntimeError: output with shape [1, 224, 224] doesn't match the broadcast shape [3, 224, 224]

In [130]:
avg_test_acc

tensor(0.8351)

In [141]:
model.load_state_dict(saved_model['model_state_dict'])

<All keys matched successfully>

### Exploaring Transforms

In [None]:
transform = transforms.Compose