In [1]:
import io
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import json

# input image
LABELS_file = 'kvasir-labels.json'
image_file = '/home/charis/kul-thesis/kvasir-dataset-v2/images/polyps/cju0qkwl35piu0993l0dewei2.jpg'

# networks such as googlenet, resnet, densenet already use global average pooling at the end, so CAM could be used directly.
model_id = 2
if model_id == 1:
    net = models.squeezenet1_1(pretrained=True)
    finalconv_name = 'features' # this is the last conv layer of the network
elif model_id == 2:
    net = models.resnet50(pretrained=True)
    finalconv_name = 'layer4'
elif model_id == 3:
    net = models.densenet161(pretrained=True)
    finalconv_name = 'features'


In [2]:
import torch
from torch import nn

device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
# load the imagenet category list
with open(LABELS_file) as f:
    classes = json.load(f)


In [3]:
numFeatures = net.fc.in_features
# loop over the modules of the net and set the parameters of
# batch normalization modules as not trainable
for module, param in zip(net.modules(), net.parameters()):
	if isinstance(module, nn.BatchNorm2d):
		param.requires_grad = False
# define the network head and attach it to the net
headnet = nn.Sequential(
	nn.Linear(numFeatures, len(classes)),
)
net.fc = headnet
# append a new classification top to our feature extractor and pop it
# on to the current device
net = net.to(device)

In [4]:
BASE_PATH = "/home/charis/kul-thesis/kvasir-dataset-v2/"

VAL_SPLIT = 0.1
TRAIN = os.path.join(BASE_PATH, "images")
TRAIN_LABELS = os.path.join(BASE_PATH, "labels")

In [5]:
# specify ImageNet mean and standard deviation and image size
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
IMAGE_SIZE = 224


In [6]:
FEATURE_EXTRACTION_BATCH_SIZE = 256
FINETUNE_BATCH_SIZE = 2
PRED_BATCH_SIZE = 8
EPOCHS = 40
LR = 0.001
LR_FINETUNE = 0.0005

In [7]:
from torch.utils.data import DataLoader
from torchvision import datasets
import os
import numpy as np
import shutil

In [8]:
import os
from PIL import Image
import torch
from torch.utils import data
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np

# get the dataloader (Note: without data augmentation)
def get_loader(img_root, label_root, batch_size=FINETUNE_BATCH_SIZE, mode='train', num_thread=os.cpu_count(), pin=True):
    if mode == 'train':
        # define augmentation pipelines
        transform = transforms.Compose([
            transforms.RandomResizedCrop(IMAGE_SIZE),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(90),
            transforms.ToTensor(),
            transforms.Normalize(mean=MEAN, std=STD)
        ])
        t_transform = transforms.Compose([
            transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
            transforms.ToTensor(),
            transforms.Normalize(mean=MEAN, std=STD)
        ])
        dataset = datasets.ImageFolder(root=img_root, transform=transform)
        data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=num_thread,
                                      pin_memory=pin)
        return dataset, data_loader


In [9]:
train_dataset, train_loader = get_loader(img_root=TRAIN, label_root=TRAIN_LABELS)

In [10]:
# initialize loss function and optimizer (notice that we are only
# providing the parameters of the classification top to our optimizer)
lossFunc = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.fc.parameters(), lr=LR)
# calculate steps per epoch for training and validation set
trainSteps = len(train_dataset) // FEATURE_EXTRACTION_BATCH_SIZE
# initialize a dictionary to store training history
H = {"train_loss": [], "train_acc": []}

In [11]:
print("[INFO] training the network...")
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader, Dataset
net = net.to(device)

for e in tqdm(range(EPOCHS)):
	net.train()
	totalTrainLoss = 0
	trainCorrect = 0
	for (i, (x, y)) in enumerate(train_loader):
		# send the input to the device
		(x, y) = (x.to(device), y.to(device))
		pred = net(x)
		loss = lossFunc(pred, y)
		loss.backward()
		# check if we are updating the net parameters and if so
		# update them, and zero out the previously accumulated gradients
		if (i + 2) % 2 == 0:
			optimizer.step()
			optimizer.zero_grad()
		# add the loss to the total training loss so far and
		# calculate the number of correct predictions
		totalTrainLoss += loss
		trainCorrect += (pred.argmax(1) == y).type(
			torch.float).sum().item()

	# calculate the average training and validation loss
	avgTrainLoss = totalTrainLoss / trainSteps
	# calculate the training and validation accuracy
	trainCorrect = trainCorrect / len(train_dataset)
	# update our training history
	H["train_loss"].append(avgTrainLoss.cpu().detach().numpy())
	H["train_acc"].append(trainCorrect)
	# print the model training and validation information
	print("[INFO] EPOCH: {}/{}".format(e + 1, EPOCHS))
	print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(
		avgTrainLoss, trainCorrect))


[INFO] training the network...


  0%|          | 0/40 [00:00<?, ?it/s]

[INFO] EPOCH: 1/40
Train loss: 212.938339, Train accuracy: 0.3974
[INFO] EPOCH: 2/40
Train loss: 189.209488, Train accuracy: 0.4819
[INFO] EPOCH: 3/40
Train loss: 184.525726, Train accuracy: 0.5068
[INFO] EPOCH: 4/40
Train loss: 180.177063, Train accuracy: 0.5201
[INFO] EPOCH: 5/40
Train loss: 179.459869, Train accuracy: 0.5249
[INFO] EPOCH: 6/40
Train loss: 175.062347, Train accuracy: 0.5346
[INFO] EPOCH: 7/40
Train loss: 179.420853, Train accuracy: 0.5284
[INFO] EPOCH: 8/40
Train loss: 173.692001, Train accuracy: 0.5415
[INFO] EPOCH: 9/40
Train loss: 174.105072, Train accuracy: 0.5460
[INFO] EPOCH: 10/40
Train loss: 174.438736, Train accuracy: 0.5470
[INFO] EPOCH: 11/40
Train loss: 170.241516, Train accuracy: 0.5633
[INFO] EPOCH: 12/40
Train loss: 169.548721, Train accuracy: 0.5653
[INFO] EPOCH: 13/40
Train loss: 168.154709, Train accuracy: 0.5650
[INFO] EPOCH: 14/40
Train loss: 169.476578, Train accuracy: 0.5553


In [None]:
trainCorrect

Error: Session cannot generate requests

In [None]:
# hook the feature extractor
net = net.cpu()
features_blobs = []
def hook_feature(module, input, output):
    features_blobs.append(output.data.cpu().numpy())

net._modules.get(finalconv_name).register_forward_hook(hook_feature)

# get the softmax weight
params = list(net.parameters())
weight_softmax = np.squeeze(params[-2].data.numpy())

def returnCAM(feature_conv, weight_softmax, class_idx):
    # generate the class activation maps upsample to 256x256
    size_upsample = (256, 256)
    bz, nc, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
        cam = cam.reshape(h, w)
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
    return output_cam


normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Resize((224,224)),
   transforms.ToTensor(),
   normalize
])


In [20]:
# load test image
img_pil = Image.open(image_file)
img_tensor = preprocess(img_pil)
img_variable = Variable(img_tensor.unsqueeze(0))
logit = net(img_variable)


h_x = F.softmax(logit, dim=1).data.squeeze()
probs, idx = h_x.sort(0, True)
probs = probs.numpy()
idx = idx.numpy()

In [22]:
for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

0.370 -> ulterative-colitis
0.184 -> dyed-lifted-polyps
0.156 -> esophagitis
0.075 -> normal-cecum
0.069 -> normal-z-line


In [24]:
# generate class activation mapping for the top1 prediction
CAMs = returnCAM(features_blobs[0], weight_softmax, [idx[0]])

# render the CAM and output
print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
img = cv2.imread(image_file)
height, width, _ = img.shape
heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
result = heatmap * 0.3 + img * 0.5
cv2.imwrite('CAM.jpg', result)

output CAM.jpg for the top1 prediction: ulterative-colitis


True