In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import os
import zipfile
from PIL import Image
from sklearn.model_selection import train_test_split
from PIL import Image
from skimage.io import imread
import pandas as pd
import time


In [2]:
# To extract the zip file. Can be skipped if already extracted
needs_extract = False
if needs_extract:
    local_zip = 'chest-xray-pneumonia.zip'
    zip_ref = zipfile.ZipFile(local_zip, 'r')
    zip_ref.extractall('demo')
    zip_ref.close()
    base_path = "demo/chest_xray/test"
    folder = os.listdir(base_path)
    !rm -r 'demo/chest_xray/chest_xray/'
    !rm -r 'demo/chest_xray/__MACOSX'

In [3]:
train_normal_path = 'demo/chest_xray/train/NORMAL/'
folder = os.listdir(train_normal_path)
print("Normal Train Images ", len(folder))
total_images = len(folder)
df_normal_train =  pd.DataFrame(index=np.arange(0, total_images), columns=["path", "target"])
for i in range(total_images):
    df_normal_train.iloc[i]['path'] =train_normal_path + folder[i]
    df_normal_train.iloc[i]['target'] = 0
train_pne_path = 'demo/chest_xray/train/PNEUMONIA/'
folder = os.listdir(train_pne_path)
print("Pneumonia Train Images ", len(folder))

total_images =  len(folder)
df_pne_train =  pd.DataFrame(index=np.arange(0, total_images), columns=["path", "target"])
for i in range(total_images):
    df_pne_train.iloc[i]['path'] = train_pne_path+ folder[i]
    df_pne_train.iloc[i]['target'] = 1

RAND_STATE = 4545


Normal Train Images  1341
Pneumonia Train Images  3875


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

df_train = pd.concat([df_normal_train.copy(),df_pne_train.copy()])
df_train = df_train.sample(frac = 1,random_state = RAND_STATE)
test_normal_path = 'demo/chest_xray/test/NORMAL/'
folder = os.listdir(test_normal_path)
print("Normal Test Images ", len(folder))
total_images = len(folder)
df_normal_test =  pd.DataFrame(index=np.arange(0, total_images), columns=["path", "target"])
for i in range(total_images):
    df_normal_test.iloc[i]['path'] =test_normal_path + folder[i]
    df_normal_test.iloc[i]['target'] = 0
test_pne_path = 'demo/chest_xray/test/PNEUMONIA/'
folder = os.listdir(test_pne_path)
print("Pneumonia Test Images ", len(folder))
total_images = len(folder)
df_pne_test =  pd.DataFrame(index=np.arange(0, total_images), columns=["path", "target"])
for i in range(total_images):
    df_pne_test.iloc[i]['path'] =test_pne_path + folder[i]
    df_pne_test.iloc[i]['target'] = 1

Normal Test Images  234
Pneumonia Test Images  390


In [5]:
df_test = pd.concat([df_normal_test.copy(),df_pne_test.copy()])
df_test = df_test.sample(frac = 1,random_state = RAND_STATE)

In [6]:
def my_transform(key="train"):
    train_sequence = [transforms.Resize((224,224)),                      
                      transforms.ToTensor()]
    test_sequence = [transforms.Resize((224,224)),
                    transforms.ToTensor()]
    data_transforms = {'train': transforms.Compose(train_sequence),
                       'test': transforms.Compose(test_sequence)}
    return data_transforms[key]


class PNEDataset(Dataset):
    
    def __init__(self, df, transform=None):
        self.states = df
        self.transform=transform
      
    def __len__(self):
        return len(self.states)
        
    def __getitem__(self, idx):
        image_path = self.states.path.values[idx]
        image = Image.open(image_path)
        image = image.convert('L')
        
        if self.transform:
            image = self.transform(image)
         
        target = np.int(self.states.target.values[idx])
        return image,target

train_dataset = PNEDataset(df_train, transform=my_transform(key="train"))
test_dataset =  PNEDataset(df_test, transform=my_transform(key="test"))

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=False)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=False)

In [7]:
def cumLaplaceDistribution(y_pred,mean,standard_deviation,all_qs):
    term1 = ((1-all_qs) * (y_pred - mean))/standard_deviation
    term1.clamp_(max = 0)
    lesser_term = all_qs * torch.exp(term1)
    term2 = (-1.0 * all_qs * (y_pred - mean))/standard_deviation
    term2.clamp_(max = 0)
    greater_term = 1 - ((1-all_qs) * torch.exp(term2))
    dummy_ones = torch.ones_like(mean)
    y_dummy_pred = torch.div(y_pred,dummy_ones)
    y_dummy_pred[y_pred >= mean] = 1.0
    y_dummy_pred[y_pred < mean] = 0.0
    return ((1 - y_dummy_pred) * lesser_term )+  (y_dummy_pred * greater_term)


def logLikelihoodLoss(y_true,y_pred,mean,standard_deviation,all_qs):
    new_pred = y_pred
    prob_tens = cumLaplaceDistribution(0.0,mean = new_pred,standard_deviation = standard_deviation,all_qs = all_qs)
    prob_tens.clamp_(min = 1e-7,max = 1 - 1e-7)
    if_one = y_true * torch.log(1 - prob_tens)
    if_zero = (1 - y_true) * torch.log(prob_tens)
    result = - 1 * torch.mean(if_one + if_zero)
    return result

def customLoss(y_true, y_pred, mean, standard_deviation, all_qs, penalty):
    ind_losses = []
    for i,j in enumerate(all_qs):
        solo_loss = logLikelihoodLoss(y_true[:,0],y_pred[:,i] ,mean, standard_deviation, j)
        ind_losses.append(solo_loss)
    zero = torch.Tensor([0]).to(device)
    dummy1 = y_pred[:,1:] - y_pred[:,:-1]
    dummy2 = penalty * torch.mean(torch.max(zero,-1.0 * dummy1))
    total_loss  = torch.mean(torch.stack(ind_losses)) +dummy2
    return total_loss

def customTestPred(y_pred,mean,standard_deviation,all_qs,batch_size = 1):
    if(batch_size == 1):
        acc = []
        cdfs = []
        eps = 1e-10
        val = (y_pred - mean)/standard_deviation 
        for xx in range(batch_size):
            if(y_pred < mean.item()):
                lesser_term = all_qs * torch.exp((1 - all_qs) * val.item())
                lesser_term  = 1 - lesser_term
                cdfs.append(lesser_term.item())
                if(lesser_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
            
            elif(y_pred >= mean.item()):
                greater_term = 1 - ((1-all_qs) * torch.exp(-1 * all_qs * val.item()))
                greater_term = 1 - greater_term
                cdfs.append(greater_term.item())
                if(greater_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
    
    elif(batch_size > 1):
        acc = []
        cdfs = []
        eps = 1e-10
        val = (y_pred - mean)/standard_deviation 
        for xx in range(batch_size):
            if(y_pred < mean[xx]):
                lesser_term = all_qs * torch.exp((1 - all_qs) * val[xx])
                lesser_term  = 1 - lesser_term
                cdfs.append(lesser_term.item())
                if(lesser_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
            elif(y_pred >= mean[xx]):
                greater_term = 1 - ((1-all_qs) * torch.exp(-1 * all_qs * val[xx]))
                greater_term = 1 - greater_term
                cdfs.append(greater_term.item())
                if(greater_term.item() >= 0.5):
                    acc.append([1])
                else:
                    acc.append([0])
    return torch.Tensor(acc).to(device).reshape(-1,1),torch.Tensor(cdfs).to(device).reshape(-1,1)

def acc_Q(train_preds,train_labels):
    train_preds = np.array(train_preds).reshape(-1,1)
    train_labels = np.array(train_labels).reshape(-1,1)

    cdfs_acc,_ = customTestPred(0,train_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),
                                batch_size = train_preds.shape[0])

    count = 0
    for i,j in zip(cdfs_acc,train_labels):
        if(i.item() == j[0]):
            count += 1
    return count/train_labels.shape[0]

def acc_tests(test_preds,test_labels):
    test_preds = np.array(test_preds).reshape(-1,1)
    test_labels = np.array(test_labels).reshape(-1,1)
    cdfs_acc,_ = customTestPred(0,test_preds,standard_deviation = 1,all_qs = torch.Tensor([0.5]),
                                batch_size = test_preds.shape[0])

    count = 0
    for i,j in zip(cdfs_acc,test_labels):
        if(i.item() == j[0]):
            count += 1
    return count/test_labels.shape[0]

In [8]:
def lr_schedule_combined_sgd(model, loader, batch_size):
    Kz = 0.0
    model.eval()
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op = model.conv1(inputs)
            op = model.bn1(op)
            op = model.relu(op)
            op = model.maxpool(op)
            op = model.layer1(op)
            op = model.layer2(op)
            op = model.layer3(op)
            op = model.layer4(op)
            op = model.avgpool(op)
            op = torch.flatten(op,1)
            for i in range(len(model.fc)-2):
                op = model.fc[i](op)
            
            activ = np.linalg.norm(op.detach().cpu().numpy())
            if activ > Kz:
                Kz = activ
    factor = 1
    K_ = (factor * Kz) / (batch_size)
    lr = 1 / K_
    return lr


In [9]:
batch_size = 32

def train_adaptive_lr(model,loader,optimizer):
    train_preds_Q = []
    train_preds_bce = []
    train_labels = []
    lr_val = lr_schedule_combined_sgd(model, loader, batch_size=32)
    optimizer.param_groups[0]['lr'] = lr_val
    model.train()
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        batch_size = labels.shape
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())
    acc_is_Q = acc_Q(train_preds_Q,train_labels)
    print("Train Acc Q : %f "%(acc_is_Q))
    return acc_is_Q


def train_fixed(model,loader,optimizer):
    train_preds_Q = []
    train_preds_bce = []
    train_labels = []
    model.train()
    for i,j in enumerate(loader):
        inputs,labels = j[0],j[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        batch_size = labels.shape
        optimizer.zero_grad()
        op_qs = model(inputs)
        lossQ = customLoss(labels.reshape(-1,1),op_qs, mean_is,std_is,all_qs,penalty)
        lossQ.backward()
        optimizer.step()
        for lag in op_qs[:,4].detach().reshape(-1,1):
            train_preds_Q.append(lag.item())
        for lag in labels.reshape(-1,1):
            train_labels.append(lag.item())
    
    acc_is_Q = acc_Q(train_preds_Q,train_labels)
    print("Train Acc Q : %f "%(acc_is_Q))
    return acc_is_Q

In [10]:
def test(model,loader,verbose=True):
    model.eval()
    test_preds_Q = []
    test_preds_bce = []
    test_labels = []
    with torch.no_grad():
        for i,j in enumerate(loader):
            inputs,labels = j[0],j[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            op_qs = model(inputs)
            for lag in op_qs[:,4].detach().reshape(-1,1):
                test_preds_Q.append(lag.item())
            for lag in labels.reshape(-1,1):
                test_labels.append(lag.item())
                
    acc_is_Q = acc_tests(test_preds_Q,test_labels)
    if verbose:
        print("Test Acc Q : %f  "%(acc_is_Q))
    return acc_is_Q


In [11]:
torch.manual_seed(756)
model_1 = torchvision.models.resnet50(pretrained=False)
model_1.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
num_features = model_1.fc.in_features
model_1.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256,9)
)
model_1 = model_1.to(device)

lr_is = 1e-2
optimizer = torch.optim.SGD(model_1.parameters(), lr = lr_is)
all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
mean_is = 0
std_is = 1
penalty = 1
epsilon = 0.00


In [12]:
max_epochs = 20

fixed_acc = 0
fixed_epochs = 0
best_fixed_epoch = 0
adaptive_acc = 0
adaptive_epochs = 0

fixed_time = []

print("LR=0.01")
print("Establishing Fixed LR Best results")

for iter in range(max_epochs):
    fixed_epochs +=1
    print("Fixed Iteration:", fixed_epochs)
    start = time.time()
    fixed_acc_train = train_fixed(model_1,train_dataloader,optimizer)
    end = time.time()
    print("Train Time: {:.3f}".format(end-start))
    fixed_time.append(end-start)
    fixed_acc_test = test(model_1,test_dataloader)
    if fixed_acc_test> fixed_acc and fixed_acc_train>fixed_acc:
        fixed_acc = fixed_acc_test
        best_fixed_epoch = fixed_epochs
        
print("Target Acc. {:.2f} in {} epochs at {:.2f} min per epoch".format(
        fixed_acc, best_fixed_epoch, np.mean(fixed_time)/60))
    



LR=0.01
Establishing Fixed LR Best results
Fixed Iteration: 1
Train Acc Q : 0.780483 
Train Time: 138.673
Test Acc Q : 0.650641  
Fixed Iteration: 2
Train Acc Q : 0.851610 
Train Time: 138.714
Test Acc Q : 0.645833  
Fixed Iteration: 3
Train Acc Q : 0.881327 
Train Time: 138.863
Test Acc Q : 0.641026  
Fixed Iteration: 4
Train Acc Q : 0.895130 
Train Time: 138.912
Test Acc Q : 0.370192  
Fixed Iteration: 5
Train Acc Q : 0.903949 
Train Time: 139.093
Test Acc Q : 0.782051  
Fixed Iteration: 6
Train Acc Q : 0.912577 
Train Time: 138.883
Test Acc Q : 0.831731  
Fixed Iteration: 7
Train Acc Q : 0.925613 
Train Time: 138.867
Test Acc Q : 0.666667  
Fixed Iteration: 8
Train Acc Q : 0.938650 
Train Time: 138.810
Test Acc Q : 0.740385  
Fixed Iteration: 9
Train Acc Q : 0.940376 
Train Time: 138.889
Test Acc Q : 0.647436  
Fixed Iteration: 10
Train Acc Q : 0.939034 
Train Time: 138.874
Test Acc Q : 0.629808  
Fixed Iteration: 11
Train Acc Q : 0.947086 
Train Time: 138.816
Test Acc Q : 0.658654 

In [13]:
torch.cuda.empty_cache()

torch.manual_seed(756)
model_1 = torchvision.models.resnet50(pretrained=False)
model_1.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
num_features = model_1.fc.in_features
model_1.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256,9)
)
model_1 = model_1.to(device)


lr_is = 1e-2
optimizer = torch.optim.SGD(model_1.parameters(), lr = lr_is)
all_qs = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
all_qs = torch.Tensor(all_qs).to(device)
mean_is = 0
std_is = 1
penalty = 1
epsilon = 0.00


In [14]:
adaptive_time = []
adaptive_epochs = 0
for iter in range(max_epochs):
    adaptive_epochs +=1
    print("Adaptive Iteration:", adaptive_epochs)
    start = time.time()
    adaptive_acc_train = train_adaptive_lr(model_1,train_dataloader,optimizer)
    end = time.time()
    print("Train Time: {:.3f}".format(end-start))
    adaptive_time.append(end-start)
    adaptive_acc_test = test(model_1,test_dataloader)
    if adaptive_acc_test>fixed_acc and adaptive_acc_train>fixed_acc:
        print()
        print("Target Acc Reached")
        print("Target Acc. {:.2f} in {} epochs at {:.2f} sec per epoch".format(
        adaptive_acc_test, adaptive_epochs, np.mean(adaptive_time)))
        break

print()


Adaptive Iteration: 1
Train Acc Q : 0.833781 
Train Time: 252.632
Test Acc Q : 0.628205  
Adaptive Iteration: 2
Train Acc Q : 0.911043 
Train Time: 253.561
Test Acc Q : 0.866987  

Target Acc Reached
Target Acc. 0.87 in 2 epochs at 253.10 sec per epoch

