In [1]:
import os
import glob
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import imageio
from skimage.transform import rescale, resize, downscale_local_mean
from skimage.restoration import (denoise_wavelet)
from skimage import exposure
from torch.utils import data
import pickle
from torchvision import transforms
from kymatio import Scattering2D
import torch
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.models as models
from sklearn.metrics import roc_auc_score
from torch.optim import lr_scheduler
from torch.autograd import Variable

In [3]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(243, 180, kernel_size = 3, stride = 1, padding = 0, groups=1, bias=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.batchnorm1 = nn.BatchNorm2d(180, momentum = 0.1)
#         self.layer2 = nn.Sequential(
#             nn.Conv2d(180, 32, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size = 2, stride = 2))
#         self.layer3 = nn.Sequential(
#             nn.Conv2d(16, 32, kernel_size = 3, stride = 1, padding = 1),
#             nn.ReLU(inplace=True))
#         self.batchnorm2 = nn.BatchNorm2d(32, momentum = 0.1)
        self.drop_out = nn.Dropout()
        self.fc_layer = nn.Sequential(nn.Linear(6480,2))
        
    def forward(self, x):
        k = x.size()[0]
        x = x.view(k,-1,15,15)
        output = self.layer1(x)
        output = self.batchnorm1(output)
#         output = self.layer2(output)
        #output = self.layer3(output)
#       output = self.batchnorm2(output)
        output = output.view(output.size(0), -1) # flatten
        output = self.fc_layer(output)
        return output#nn.functional.softmax(output, dim = 1)

In [4]:
class newcancer(data.Dataset):
    def __init__(self, samplerate,phase = 'trainc'):
        """
        Args:
            transform (optional): Optional transform to be applied on a sample.
            green: If true, only take the green channel
            samplerate: take how much percent of the original tadaset
            Note: Whenever you change a samplerate, you need to resave the pickle
        """
        file = open('scat'+str(samplerate)+phase+'.pickle','rb')
        self.image_data_dict = pickle.load(file)

    def __len__(self):
        return len(self.image_data_dict)

    def __getitem__(self,index):
        '''
        Return a tuple containing the image tensor and corresponding class for the given index.
        Parameter:
        index: This is the index created by _init_, it's the key of the dict in _init_
               Notice that a single patient could have multiple index associated.
        '''
        if index not in self.image_data_dict:
            raise ValueError('Index out of bound')
        return self.image_data_dict[index]


In [6]:
allauc = {}
allrecall = {}
allacc = {}
for rate in [0.7,0.8,0.9]:

    trainimg = newcancer(samplerate = rate,phase = 'trainc')
    testimg = newcancer(samplerate = rate,phase = 'testc')  
    use_cuda = torch.cuda.is_available()
    device = torch.cuda.device("cuda:0" if use_cuda else "cpu")
    params = {'batch_size': 4, 
              'shuffle': True,
              'num_workers': 4}
    patch_training_generator = data.DataLoader(trainimg, **params)
    patch_test_generator = data.DataLoader(testimg,**params)
    
    train_model = ConvNet()
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(train_model.parameters(), lr=0.01)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=5)


    best_acc = 0.0
    best_acc1 = 0.0
    num_epochs = 15
    predall = [] #max
    predprob = [] #
    truelabel = []
    
    for epoch in range(num_epochs):
        train_model.train()
        train_acc = 0.0
        lentra = 0
        lentes = 0
        for i, (images, label) in enumerate(patch_training_generator):
            lentra += images.size()[0]
            #images.requires_grad_(True)
            images = Variable(images, requires_grad=True)
            label = Variable(label)
            # Run the forward pass
            outputs = train_model(images)
            loss = criterion(outputs, label)
            #a = list(train_model.parameters())[0].clone()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #b = list(train_model.parameters())[0].clone()
            #print(torch.equal(a.data, b.data))
            # Store result
            #print(outputs.data)
            _, predicted = torch.max(outputs.data, 1)

            # Save train accuracy
            train_acc += torch.sum(predicted == label.data)

        train_acc = float(train_acc) / lentra
        scheduler.step()
        # Check test accuracy, if the accuracy is higher than before, save the model

        train_model.eval()
        for child in train_model.children():
            if type(child)==nn.BatchNorm2d:
                child.track_running_stats = False
        ta = 0.0
        predall = [] #max
        predprob = [] #
        truelabel = []

        for i, (image, label) in enumerate(patch_test_generator):
            lentes += image.size()[0]
            images = Variable(image)
            label = Variable(label)
            out = train_model(image)
            pos_proba = torch.nn.functional.softmax(out.data)[:,1]
            predprob.append(pos_proba)
            truelabel.append(label.data)  
            _, prediction = torch.max(out.data,1)  
            prediction2 = (pos_proba > 0.5)*1 #prediction based on 0.5 threshold
            predall.append(prediction)
            prediction2 = (pos_proba > 0.5)*1
            ta1 =+ np.sum(prediction2.numpy() == label.data.numpy()) #used to calculate the accuracy based on 0.5 threshold
            ta += torch.sum(prediction == label.data)
        labels = torch.cat(truelabel).numpy()
        preds = torch.cat(predprob).numpy()
        bpreds = torch.cat(predall).numpy()
        ta = float(ta) / lentes #test size
        auc = roc_auc_score(labels, preds)
        recall = np.sum((bpreds == labels )&(labels == 1))/np.sum(labels == 1)
        print('auc = {}'.format(auc))
        print('recall = {}'.format(recall))
        print('accuracy = {}'.format(ta))
        print('accuracy from bpreds = {}'.format(np.sum(bpreds == labels)/len(labels)))
        print("Epoch {}: train_accuracy is {}, test_accuracy is {}".format(epoch, train_acc, ta))
        if epoch >= 10:
            if rate not in allauc.keys():
                allauc[rate] = [auc]
                allrecall[rate] = [recall]
                allacc[rate] = [ta]
            else:
                allauc[rate].append(auc)
                allrecall[rate].append(recall)
                allacc[rate].append(ta)





auc = 0.7610921948664222
recall = 0.7469879518072289
accuracy = 0.7195325542570952
accuracy from bpreds = 0.7195325542570952
Epoch 0: train_accuracy is 0.6669054441260746, test_accuracy is 0.7195325542570952
auc = 0.7078837087480356
recall = 0.7662650602409639
accuracy = 0.7061769616026711
accuracy from bpreds = 0.7061769616026711
Epoch 1: train_accuracy is 0.7385386819484241, test_accuracy is 0.7061769616026711
auc = 0.7197354635935045
recall = 0.8530120481927711
accuracy = 0.7262103505843072
accuracy from bpreds = 0.7262103505843072
Epoch 2: train_accuracy is 0.7707736389684814, test_accuracy is 0.7262103505843072
auc = 0.7316134101623887
recall = 0.6698795180722892
accuracy = 0.674457429048414
accuracy from bpreds = 0.674457429048414
Epoch 3: train_accuracy is 0.7922636103151862, test_accuracy is 0.674457429048414
auc = 0.8260083813514929
recall = 0.8192771084337349
accuracy = 0.7746243739565943
accuracy from bpreds = 0.7746243739565943
Epoch 4: train_accuracy is 0.7929799426934098,

auc = 0.8646160754378086
recall = 0.916030534351145
accuracy = 0.8291457286432161
accuracy from bpreds = 0.8291457286432161
Epoch 10: train_accuracy is 0.9103563474387528, test_accuracy is 0.8291457286432161
auc = 0.8710148181409968
recall = 0.8854961832061069
accuracy = 0.8040201005025126
accuracy from bpreds = 0.8040201005025126
Epoch 11: train_accuracy is 0.9164810690423163, test_accuracy is 0.8040201005025126
auc = 0.8480017961383026
recall = 0.8625954198473282
accuracy = 0.7788944723618091
accuracy from bpreds = 0.7788944723618091
Epoch 12: train_accuracy is 0.9192650334075724, test_accuracy is 0.7788944723618091
auc = 0.8705657835653345
recall = 0.8931297709923665
accuracy = 0.7889447236180904
accuracy from bpreds = 0.7889447236180904
Epoch 13: train_accuracy is 0.9253897550111359, test_accuracy is 0.7889447236180904
auc = 0.8907723394701392
recall = 0.8931297709923665
accuracy = 0.8291457286432161
accuracy from bpreds = 0.8291457286432161
Epoch 14: train_accuracy is 0.9209354120

In [10]:
images.size()

torch.Size([3, 3, 81, 15, 15])

In [7]:
allauc

{0.7: [0.85821110529072819,
  0.83749345206914627,
  0.85369303300157151,
  0.85267155578837084,
  0.84330801466736505],
 0.8: [0.82843795620437954,
  0.85407299270072989,
  0.83646715328467158,
  0.84239416058394168,
  0.85275912408759114],
 0.9: [0.86461607543780861,
  0.87101481814099679,
  0.84800179613830262,
  0.87056578356533454,
  0.89077233947013923]}

In [8]:
allacc

{0.7: [0.7963272120200334,
  0.8080133555926544,
  0.8013355592654424,
  0.8046744574290484,
  0.8013355592654424],
 0.8: [0.7819548872180451,
  0.8145363408521303,
  0.7869674185463659,
  0.7969924812030075,
  0.8095238095238095],
 0.9: [0.8291457286432161,
  0.8040201005025126,
  0.7788944723618091,
  0.7889447236180904,
  0.8291457286432161]}

In [9]:
allrecall

{0.7: [0.85783132530120487,
  0.86987951807228914,
  0.86506024096385548,
  0.87469879518072291,
  0.8602409638554217],
 0.8: [0.86131386861313863,
  0.88321167883211682,
  0.86861313868613144,
  0.85036496350364965,
  0.88321167883211682],
 0.9: [0.91603053435114501,
  0.8854961832061069,
  0.86259541984732824,
  0.89312977099236646,
  0.89312977099236646]}