## IMDB 1D CNN 시각화
Grad-CAM (Gradient-weighted Class Activation Map)을 이용한 클래스 별 활성도 계산

-----

In [1]:
import os
import csv
import keras
import numpy as np
import pandas as pd
import keras.backend as K
from keras.utils import to_categorical

os.chdir('C:/Users/HK/Desktop/CNN/IMDB')

Using TensorFlow backend.


- Load a Pre-Trained Model

In [16]:
from keras.models import load_model

base = load_model('CAM/visualize_3_32_5.hdf5')
print(base.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 100, 50)           250000    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 100, 32)           8032      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 50, 32)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 50, 32)            5152      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 25, 32)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 25, 32)            5152      
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 12, 32)            0         
__________

- Data Preparation

In [17]:
from keras.datasets import imdb
from keras.preprocessing import sequence

max_features = 5000
maxlen = 100
lastlen = 25 

In [18]:
(xtrain, ytrain), (xtest, ytest) = imdb.load_data(num_words = max_features)

xtrain = sequence.pad_sequences(xtrain, maxlen = maxlen)
xtest = sequence.pad_sequences(xtest, maxlen = maxlen)

In [19]:
last_conv_layer = base.get_layer('conv1d_3')
last_fn = 32

### ★★ Grad-CAM Algorithm

In [29]:
def Grad_CAM_1D(xdata, i):
    exdata = xdata[i, :]
    exdata = exdata.reshape(1, maxlen)

    predprob = base.predict(exdata)
    fitted = base.output[:,np.argmax(predprob)]

    # 클래스를 설명할 수 있게 컨볼루션 필터 가중치의 gradient를 구함
    grads = K.gradients(fitted, last_conv_layer.output)[0] 

    # 필터별로 가중치를 구함
    pooled_grads = K.mean(grads, axis = (0, 1))
    iterate = K.function([base.input], [pooled_grads, last_conv_layer.output[0]])
    pooled_grads_value, conv_layer_output_value = iterate([exdata])

    for j in range(last_fn): # 마지막 conv layer의 필터 개수
        conv_layer_output_value[:,j] *= pooled_grads_value[j]
        
    heatmap = np.mean(conv_layer_output_value, axis = -1)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= (np.max(heatmap) + 1e-10)
        
    return heatmap, np.argmax(predprob)

- TEST 데이터 10개에 대해서만 csv 파일로 활성도 출력

In [31]:
for id in range(10):

    heattmp, probtmp = Grad_CAM_1D(xtest, id)

    f = open('CAM/heat_' + str(id+1) + '_' + str(ytest[id]) + '.csv',
             'w', encoding = 'utf-8', newline = '')
    wr = csv.writer(f)
    wr.writerow([probtmp])            # softmax output에서 최대 확률
    for k in range(lastlen):          # 마지막 conv layer의 축소된 시퀀스 길이
        wr.writerow([heattmp[k]])
    f.close()  