In [None]:
!pip install lime==0.1.1.37
!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集
!unzip food-11.zip # 解壓縮

# train一个基础分类任务的CNN

In [2]:
# make labels
import os
def EarnName():
    path = "/content/food-11/"
    original_images = []
    pict_name = open('train.txt', 'w+')
    for root, dirs, filenames in os.walk(path):
        for filename in filenames:
            original_images.append(root + "/" + filename)
    original_images = sorted(original_images)
    print('num: {}'.format(len(original_images)))
    for filename in (original_images):
        filename = filename.replace('\\', '/')
        print(filename)
        pict_name.write(filename + '\n')
    pict_name.close()

In [4]:
# dataset: make dataset
import os
import torch
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import cv2
import numpy as np
from PIL import Image

In [4]:
TRAIN_PATH = '/content/drive/MyDrive/Lee/food11_train.csv'
TEST_PATH = '/content/drive/MyDrive/Lee/food11_val.csv'
BATCH_SIZE = 128
IMG_SIZE = 140
CROP_SIZE = 128
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
BEAT_ACC = 0.
SAVE_PATH = '/content/drive/MyDrive/Lee/weights/best.pth.tar'

train_transform = transforms.Compose([
    transforms.RandomChoice([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),                
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.RandomCrop((CROP_SIZE, CROP_SIZE)),
        transforms.RandomOrder([
            transforms.ColorJitter(brightness=0.1),
            transforms.ColorJitter(saturation=0.2),
            transforms.ColorJitter(contrast=0.2),
        ]),
    ]),
    transforms.Resize((CROP_SIZE, CROP_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])
test_transform = transforms.Compose([
    transforms.Resize((CROP_SIZE, CROP_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

class ImgDataset(Dataset):
    def __init__(self, imgs_path, imgs_label, is_training=True):
        self.imgs_path = imgs_path
        self.imgs_label = imgs_label
        self.is_training = is_training

    def __getitem__(self, index):
        img_path = self.imgs_path[index]
        img_label = self.imgs_label[index]
        img = Image.open(os.path.join('/content', img_path)).convert('RGB')
        if self.is_training:
            img = train_transform(img)
        else:
            img = test_transform(img)
        return img, img_label

    def __len__(self):
        return len(self.imgs_path)

train_info = pd.read_csv(TRAIN_PATH)
train_imgs = train_info['name']
train_labels = train_info['label']
test_info = pd.read_csv(TEST_PATH)
test_imgs = test_info['name']
test_labels = test_info['label']

trainDS = ImgDataset(train_imgs, train_labels, is_training=True)
trainDL = DataLoader(trainDS, BATCH_SIZE, shuffle=True, drop_last=True)
testDS = ImgDataset(test_imgs, test_labels, is_training=False)
testDL = DataLoader(testDS, BATCH_SIZE, shuffle=False)

for idx, (imgs, labels) in enumerate(trainDL):
    print(imgs.shape)
    print(labels)
    break 

torch.Size([128, 3, 128, 128])
tensor([ 5,  2,  9,  4, 10,  4,  5,  5,  9,  5,  9,  6,  8,  6,  2,  4,  5,  1,
         5,  5,  0,  9,  6,  2,  3,  5,  2,  1,  4,  8,  5,  3,  5,  1,  5,  2,
         7,  2,  5,  8,  3,  8,  2,  1,  3,  9,  3, 10,  3,  5,  2,  7, 10, 10,
         5,  2,  0,  9,  2,  0,  9,  2,  2,  0,  9,  2,  3, 10,  8,  0,  8,  9,
         5,  2,  6,  0,  9,  4,  2, 10,  9,  2,  5,  9,  9,  9,  9, 10,  3,  9,
         9,  8,  9,  3,  5,  5, 10,  5,  9,  9,  9,  0, 10,  9,  5,  7,  9,  5,
         9,  7,  5,  0, 10,  5,  2,  8,  9,  2, 10,  2,  5,  9, 10,  1,  1,  8,
         2,  8])


In [5]:
import torch.nn as nn
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),
        )

        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )
    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

model = Classifier().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
EPOCH = 100


In [6]:
import math
from torch.optim.lr_scheduler import LambdaLR
def Cos_warmup(optimizer, epoch_warmup, epoch_training, num_cycles=0.5, last_epoch=-1):
    def lr_lambda(current_epoch):
        if current_epoch < epoch_warmup:
            return float(current_epoch)/float(max(1, epoch_warmup))

        process = float(current_epoch-epoch_warmup)/\
                  float(max(1, epoch_training-epoch_warmup))
        return max(0.0, 0.5*(1.0+math.cos(math.pi*float(num_cycles)*2.0*process)))
    return LambdaLR(optimizer, lr_lambda, last_epoch)


cosWarmUp = Cos_warmup(
    optimizer,
    epoch_warmup=5,
    epoch_training=100
)

In [7]:
x = torch.randn((2, 3, 128, 128))
y=model(x.cuda())

In [1]:
import time

for epoch in range(EPOCH):
    epoch_start_time = time.time()
    train_acc = 0.
    train_loss = 0.
    val_acc = 0.
    val_loss = 0.

    model.train()
    for i, (imgs, labels) in enumerate(trainDL):
        optimizer.zero_grad()
        train_pred = model(imgs.cuda())
        batch_loss = loss(train_pred, labels.cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == labels.numpy())
        train_loss += batch_loss.item()
    cosWarmUp.step()

    model.eval()
    with torch.no_grad():
        for i, (imgs, labels) in enumerate(testDL):
            val_pred = model(imgs.cuda())
            batch_loss = loss(val_pred, labels.cuda())
            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == labels.numpy())
            val_loss += batch_loss.item()
        #將結果 print 出來
        if (val_acc/testDS.__len__()) > BEAT_ACC:
            state = {
                'epoch': epoch,
                'model': model.state_dict()
            }
            torch.save(state, SAVE_PATH)
            print('saving model to:'+ SAVE_PATH)
            BEAT_ACC = (val_acc/testDS.__len__())
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, EPOCH, time.time()-epoch_start_time, \
             train_acc/trainDS.__len__(), train_loss/trainDS.__len__(), val_acc/testDS.__len__(), val_loss/testDS.__len__()))

NameError: ignored

# CNN的可解释性

# saliency map

原理：

---
在我们计算loss的时候，与loss相关的项有image，label，model
parameter。因此loss的计算backward时我们只在乎loss对model parameter的偏微分
值，在数学上image本身也是continuous tensor，我们可以计算loss对image的偏微分值，而这个值代表了在参数和标签都固定的情况下，稍微改变image的pixel value对loss的影响是什么，人们把这种变化的剧烈程度解读为pixel的重要性（每个pixel都有自己的偏微分值），因此吧同一张图中loss对每个pixel的偏微分值计算出来就可以知道那些位置是model在判断时的重要依据。

操作：

---
一般情况下在forward后算出loss，然后进行backward，而这个backward，pytorch预设是计算loss对参数的偏微分值，只需要告诉pytorch，image也是要算的偏微分对象之一。

In [None]:
def normalize(image):
    return (image - image.min()) / (image.max() - image.min())

def compute_saliency_maps(x, y, model):
    model.eval()
    x = x.cuda()

    x.requires_grad_()
    y_pred = model(x)
    loss_func = torch.nn.CrossEntroyLoss()
    loss = loss_func(y_pred, y.cuda())
    loss.backward()

    saliencies = x.grad.abs().detach().cpu()
    saliencies = torch.stack([normalize(item) for item in saliencies])
    return saliencies

In [None]:
img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
saliencies = compute_saliency_maps(images, labels, model)

fig, axs = plt.subplots(2, len(img_indices) figsize=(15, 8))
for row, target in enumerate([images, saliencices]):
    for coloumn, img in enumerate(target):
        axs[row][colunmn].imshow(img.permute(1, 2, 0).numpy())

plt.show()
plt.close()

# Filter explaination
原理：

---
希望知道某一个filter到底认出了什么，有两件事可以做：

filer activation: 挑选几张图片，看看图片中那些位置会activate这个filter

filter visualization： 怎样的image可以最大程度的activate这个filter

操作：

---
在操作层面的原理上可以直接修改model definition，让forward不止返回loss，还有activation map，但在实际上是使用pytorch中的hook函数




In [None]:
def normalize(image):
    return (image-image.min()) / (image.max() - image.min())

layer_activations = None

def filter_explaination(x, model, cnnid, filterid, iteration=100, lr=1):
    """
    x:对于被指定的图片，那些位置可以activate被指定的filter
    """
    model.eval()

    def hook(model, input, output):
        global layer_activations
        layer_activations = output

    hook_handle = model.children[cnnid].register_forward_hook(hook)

    # filter activation：x经过被指定filter的activation map
    model(x.cuda())
    filter_activations = layer_activations[:, filterid, :, :].detach().cpu()

    # filter visualization：找出可以最大程度activate这个filter的图片
    x = x.cuda()
    x.requires_grad_()
    optimizer = torch.optim.Adam([x], lr=lr)

    for iter in range(iteration):
        optimizer.zero_grad()
        model(x)

        objective = -layer_activations[:, filterid, :, :].sum()

        objective.backward()
        optimizer.step()
    filter_visualization = x.detach().cpu().squeeze()[0]
    hook_handle.remove()

    return filter_activations, filter_visualization

In [None]:
img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
filter_activations, filter_visualization = filter_explaination(images, model, cnnid=15, filterid=0, iteration=100, lr=0.1)

plt.imshow(normalize(filter_visualization.permute(1, 2, 0)))
plt.show()
plt.close()

fig, axs = plt.subplots(2, len(img_indices), figsize=(15, 8))
for i, img in enumerate(images):
    axs[0][1].imshow(img.permute(1, 2, 0))
for i, img in enumerate(filter_activations):
    axs[1][1].imshow(normalize(img))
plt.show()
plt.imshow()

# Lime

基于现成的库

In [None]:
from skimage.segmentation import slic
from lime import lime_image
from pdb import set_trace
def predict(input):
    model.eval()
    input = torch.FloatTensor(input).permute(0, 3, 1, 2)
    
    output = model(input.cuda())
    return output.detach().cpu().numpy()

def segmentation(input):
    return slic(input, n_segments=100, compactness=1, sigma=1)

img_indices = [83, 4218, 4707, 8598]
images, labels = train_set.getbatch(img_indices)
fig, axs = plt.subplots(1, 4, figsize=(15, 8))
np.random.seed(16)

for idx, (image, label) in enumerate(zip(images.permute(0, 2, 3, 1).numpy(), labels)):
    x = image.astype(np.double)
    explainer = lime_image.LimeImageExplainer()
    explaination = explainer.explain_instance(
        image=x, 
        classifier_fn=predict, # 定义图片如何经过model得到prediction
        segmentation_fn=segmentation  # 定义如何把图片做segmentatin
    )

    lime_img, mask = explaination.get_image_and_mask(
        label=label.item(),
        positive_only=False,
        hide_rest=False,
        num_features=11,
        min_weight=0.05
    )

    axs[idx].inshow(lime_img)

plt.show()
plt.close()