In [11]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
import tqdm

# Read image
# files  = sorted(glob.glob("./chexpert/train/"))
# print(len(files))
# for f in tqdm.tqdm(files):
#     img = cv2.imread(f, cv2.IMREAD_COLOR)
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     # resize to 224x224
#     img = cv2.resize(img, (224, 224),cv2.INTER_AREA )
#     # save over original
#     cv2.imwrite(f, img)


In [12]:
import torch
import torch.nn as nn
import torchvision
from torchvision.models import ResNet101_Weights, resnet101

# Load pretrained model
model = resnet101(weights=ResNet101_Weights.IMAGENET1K_V2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
import pandas as pd
import glob
my_glob = glob.glob('./CheXpert-v1.0-small/train/patient*/study*/*.jpg')
print('Number of Observations: ', len(my_glob)) 

train_df = pd.read_csv('./CheXpert-v1.0-small/train.csv')
print(f'the shape of the training dataset is : {train_df.shape}')
train_df.head()

Number of Observations:  223414
the shape of the training dataset is : (223414, 19)


Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,CheXpert-v1.0-small/train/patient00001/study1/...,Female,68,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0
1,CheXpert-v1.0-small/train/patient00002/study2/...,Female,87,Frontal,AP,,,-1.0,1.0,,-1.0,-1.0,,-1.0,,-1.0,,1.0,
2,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Frontal,AP,,,,1.0,,,-1.0,,,,,,1.0,
3,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Lateral,,,,,1.0,,,-1.0,,,,,,1.0,
4,CheXpert-v1.0-small/train/patient00003/study1/...,Male,41,Frontal,AP,,,,,,1.0,,,,0.0,,,,


In [9]:
def prepare_dataset(dataframe,policy,class_names):
    dataset_df = dataframe[dataframe['Frontal/Lateral'] == 'Frontal'] #take frontal pics only
    df = dataset_df.sample(frac=1., random_state=1)
    df.fillna(0, inplace=True) #fill the with zeros
    x_path, y_df = df["Path"].to_numpy(), df[class_names]
    class_ones = ['Atelectasis', 'Cardiomegaly']
    y = np.empty(y_df.shape, dtype=int)
    for i, (index, row) in enumerate(y_df.iterrows()):
        labels = []
        for cls in class_names:
            curr_val = row[cls]
            feat_val = 0
            if curr_val:
                curr_val = float(curr_val)
                if curr_val == 1:
                    feat_val = 1
                elif curr_val == -1:
                    if policy == "ones":
                        feat_val = 1
                    elif policy == "zeroes":
                        feat_val = 0
                    elif policy == "mixed":
                        if cls in class_ones:
                            feat_val = 1
                        else:
                            feat_val = 0
                    else:
                        feat_val = 0
                else:
                    feat_val = 0
            else:
                feat_val = 0
            
            labels.append(feat_val)
            
        y[i] = labels
        
    return x_path,y

In [13]:
class_names = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural Effusion']
policy = ['ones','zeroes','mixed']
x_path,labels = prepare_dataset(train_df,policy[-1],class_names)

In [None]:
from sklearn.model_selection import train_test_split
import os

def load_chexpert():

    train_df = pd.read_csv('./chexpert/train.csv')
    print(f'the shape of the training dataset is : {train_df.shape}')
    paths = train_df['Path'].values

    raw_data_train = []
    for file in paths:
        image_path = file
        
        

In [18]:
train, test = load_isic_2019()

In [27]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# dataloader
class ISICDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img, label = self.data[idx]
        if self.transform:
            img = self.transform(img)
        return img, label
    
# transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225] )
])

# datasets
train_dataset = ISICDataset(train, transform=transform)
test_dataset = ISICDataset(test, transform=transform)

# dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [21]:
model.fc = nn.Linear(2048, 8)
model = torch.nn.DataParallel(model)
model = model.to(device)

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def test_model(model, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data = data.to(device)
            target = target.to(device)

            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    print('\nTest set: Accuracy: {:.0f}%\n'.format(
            100. * correct / len(test_loader.dataset)))
    
    return correct / len(test_loader.dataset)

def train_model(model, train_loader, test_loader, optimizer, epochs):
    acc_list = []
    loss_list = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for i, (data, target) in enumerate(train_loader):
            data = data.to(device)
            target = target.to(device)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/100))
                running_loss = 0.0
        
        acc = test_model(model, test_loader)
        acc_list.append(acc)
        loss_list.append(running_loss)

    return acc_list, loss_list



In [28]:
train_model(model, train_dataloader, test_dataloader, optimizer, 10)

[1,   100] loss: 0.803
[1,   200] loss: 0.819
[1,   300] loss: 0.801
[1,   400] loss: 0.804
[1,   500] loss: 0.785
[1,   600] loss: 0.798
[1,   700] loss: 0.781

Test set: Accuracy: 74%

[2,   100] loss: 0.700
[2,   200] loss: 0.712
[2,   300] loss: 0.716
[2,   400] loss: 0.709
[2,   500] loss: 0.702
[2,   600] loss: 0.688
[2,   700] loss: 0.662

Test set: Accuracy: 74%

[3,   100] loss: 0.626
[3,   200] loss: 0.625
[3,   300] loss: 0.619
[3,   400] loss: 0.618
[3,   500] loss: 0.584
[3,   600] loss: 0.620
[3,   700] loss: 0.607

Test set: Accuracy: 75%

[4,   100] loss: 0.534
[4,   200] loss: 0.542
[4,   300] loss: 0.518
[4,   400] loss: 0.522
[4,   500] loss: 0.537
[4,   600] loss: 0.516
[4,   700] loss: 0.502

Test set: Accuracy: 76%

[5,   100] loss: 0.448
[5,   200] loss: 0.442
[5,   300] loss: 0.432
[5,   400] loss: 0.425
[5,   500] loss: 0.451
[5,   600] loss: 0.448
[5,   700] loss: 0.443

Test set: Accuracy: 77%

[6,   100] loss: 0.383
[6,   200] loss: 0.352
[6,   300] loss: 0.

KeyboardInterrupt: 

In [29]:
test_model(model, test_dataloader)


Test set: Accuracy: 78%



0.7770323599052881

In [30]:
torch.save(model.module.state_dict(), "../project-in-medical-image-computing-anirudhkaushik2003/src/feature_extractor/chexpert_feature_extractor.pth")