In [None]:
from kaggle.api.kaggle_api_extended import KaggleApi 
import os 
api = KaggleApi()
os.environ['KAGGLE_WORKING_DIR'] = '.'
api.authenticate()
api.dataset_download_files('paultimothymooney/chest-xray-pneumonia', path='dataset', unzip=True)

In [33]:
import os
data_dir = 'dataset/chest_xray'
train_dir = os.path.join(data_dir,'train')
test_dir  = os.path.join(data_dir,'test')
val_dir   = os.path.join(data_dir,'val')


In [34]:
train_normal_dir =os.path.join(train_dir,'NORMAL')
train_pneu_dir = os.path.join(train_dir,'PNEUMONIA')
train_normal_images , train_pneu_images = os.listdir(train_normal_dir),os.listdir(train_pneu_dir)
# len(train_normal_images),len(train_Pneu_images)


In [35]:
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms
import torch 
from PIL import Image

class ChestXrayDataset(Dataset):
    def __init__(self,normal_dir,pneu_dir,transform):
        
        self.transform = transform
        self.normal_dir = normal_dir 
        self.pneu_dir = pneu_dir
        self.normal_images = os.listdir(normal_dir)
        self.pneu_images = os.listdir(pneu_dir)

        self.images  = [(0,normal_image) for normal_image in self.normal_images] + [(1,pneu_image) for pneu_image in self.pneu_images]
    
    def __len__(self):
        return len(self.images)
    def __getitem__(self,idx):
        label,img = self.images[idx] 
        img_path = os.path.join(self.pneu_dir if label  else self.normal_dir,img)
        image = Image.open(img_path)
        image = image.convert('RGB')
        image = self.transform(image)
        return image,label

In [36]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

dataset = ChestXrayDataset(train_normal_dir,train_pneu_dir,transform)

In [37]:

batch_size = 200
dataloader = DataLoader(dataset,batch_size=batch_size)

In [38]:
import torchvision.models as models
import torch.nn as nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [39]:
model = models.resnet50(pretrained=True)

for param in model.parameters():
    param.requires_grad = False 

last_layer = model.fc
model.fc = nn.Linear(model.fc.in_features,1)
model = model.to(device) 



In [40]:
weight = torch.tensor([len(train_normal_images)/len(train_pneu_images)],dtype=torch.float32).to(device)


criterion  = nn.BCEWithLogitsLoss(pos_weight=weight)
lr = 1e-3
optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=1e-5)
epochs = 100



In [41]:
for epoch in range(epochs):
    batch_count = 0
    epoch_loss = 0 
    for X,y in dataloader:
        batch_count += 1
        optimizer.zero_grad()
        X = X.to(device)
        y = y.float().to(device)
        y = y.unsqueeze(1)
        y_pred = model(X).to(device)
        loss = criterion(y_pred,y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

        if batch_count % 100 != 0:
            print(f'Epoch : {epoch}/{epochs} , Batch: {batch_count} , Loss : {loss.item()}')
    print(f'Epoch : {epoch} , Average loss: {epoch_loss/batch_count}')

Epoch : 0/100 , Batch: 1 , Loss : 0.9046885967254639
Epoch : 0/100 , Batch: 2 , Loss : 0.45448118448257446
Epoch : 0/100 , Batch: 3 , Loss : 0.21124869585037231
Epoch : 0/100 , Batch: 4 , Loss : 0.0993746742606163
Epoch : 0/100 , Batch: 5 , Loss : 0.04884343221783638
Epoch : 0/100 , Batch: 6 , Loss : 0.026598354801535606


KeyboardInterrupt: 