In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import random as rn
import csv
import os
import cv2
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img

In [None]:
# tensorflowの乱数シード固定(再現性の担保)
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(7)
rn.seed(7)

session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

tf.set_random_seed(7)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [None]:
batch_size = 32
n_epochs = 50

# 画像データ取り込み


In [None]:
DATA_FOLDER = '../data/'
datalist = pd.read_csv(os.path.join(DATA_FOLDER, 'frames_data.csv'), names=["img_path", "l_class", 's_class','timestamp'])
datalist.head()

In [None]:
l_classlist = ['4gokan', '5gokan', '5gokan-denkisogokan', '5gokan-mediacenter'
                    '8gokan', 'denkisogokan', 'lab', 'lab-lounge', 'lounge']
s_classlist = ['4gokan_inside', '4gokan_outside', '5gokan-denkisogokan', '5gokan-mediacenter',
'5gokan_1F', '5gokan_2F', '5gokan_3F', '5gokan_ent_east', '5gokan_lounge',
'5gokan_out_east', '5gokan_parking', '5gokan_smoking', '5gokan_stairs_cnt',
'5gokan_stairs_west', '8gokan_1F', '8gokan_ent_north', '8gokan_ent_south',
'denkisogokan_2F', 'denkisogokan_3F', 'denkisogokan_4F', 'denkisogokan_elevator',
'denkisogokan_lounge', 'denkisogokan_stairs', 'lab-lounge', 'lab_bs_cnt',
'lab_corner', 'lab_desk', 'lab_desk_table', 'lab_ent', 'lab_printer',
'lab_table', 'lab_wb_cnt', 'lab_wb_ent', 'lounge']
num_l_class =len(l_classlist)
num_s_class = len(s_classlist)

In [None]:
print(num_s_class)

In [None]:
dfl = datalist.drop(['s_class','timestamp'], axis=1)
dfl.head()

In [None]:
dfl.groupby('l_class').count()

In [None]:
dfs = datalist.drop(['l_class', 'timestamp'], axis=1)
dfs.groupby('s_class').count()

# Markdown
from sklearn.preprocessing import LabelEncoder

# Markdown
le = LabelEncoder()
le.fit(dfs.s_class)
dfs["labels"] = le.transform(dfs.s_class)
dfs.groupby('labels')

# Markdown
cor_table = dfs.groupby('labels').s_class.unique()
cor_table = pd.DataFrame(cor_table)
cor_table.head()

# Markdown
cor_table.to_csv('cor_table.csv', index=False)

# Markdown
dfs = dfs.drop(['s_class'], axis=1)

# Markdown
x = []
for i in range(len(dfs.img_path)):
    image = img_to_array(load_img(dfs.img_path[i], target_size=(224,224)))
    x.append(image)

# Markdown
y = dfs.labels
print(y)

# Markdown
#正規化
x = np.array(x, dtype='float32')/255

# Markdown
#np.array形式のデータを保存　x, y
np.save(file='x.npy', arr=x)
np.save(file='y.npy', arr=y)

In [None]:
#x, yをload
x = np.load(file='x.npy')
y = np.load(file='y.npy')

In [None]:
print(x.shape)
print(y.shape)

# train test & validation set split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x, x_valid, y, y_valid = train_test_split(x,y, random_state=42, test_size=0.2, stratify=y)

# EDA(探索的データ解析)


In [None]:
fig = plt.figure(figsize=(9,18))
fig.subplots_adjust(left=0, right=1, bottom=0, top=0.5, hspace=0.05, wspace=0.05)
for i in range(5):
    ax = fig.add_subplot(1, 5, i+1, xticks=[], yticks=[])
    ax.imshow(x[i])

# CNN(keras NotPretrained)

In [None]:
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Activation, add, Add, Dropout, BatchNormalization, GlobalMaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
inputs = Input(shape=(224,224,3))
x1 = Conv2D(64, kernel_size=(3,3), activation='relu')(inputs)
x2 = Conv2D(64, kernel_size=(3,3), activation='relu')(x1)
x3 = MaxPooling2D(pool_size=(2,2))(x2)
x4 = Conv2D(128, kernel_size=(3,3), activation='relu')(x3)
x5 = Conv2D(128, kernel_size=(3,3), activation='relu')(x4)
x6 = MaxPooling2D(pool_size=(2,2))(x5)
x7 = Flatten()(x6)
prediction = Dense(num_s_class, activation='softmax')(x7)

model = Model(inputs=inputs, outputs=prediction)
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, mode='auto')
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.save('omni_video.h5')
model.summary()

In [None]:
feature1 = GlobalMaxPooling2D(name='Conv2D_1')(x1)
feature2 = GlobalMaxPooling2D(name='Conv2D_1')(x2)
feature3 = GlobalMaxPooling2D(name='Conv2D_1')(x3)
feature4 = GlobalMaxPooling2D(name='Conv2D_1')(x4)
feature5 = GlobalMaxPooling2D(name='Conv2D_1')(x5)
feature6 = GlobalMaxPooling2D(name='Conv2D_1')(x6)

In [None]:
hidden_model1 = Model(inputs=model.input, outputs=feature1)
hidden_model2 = Model(inputs=model.input, outputs=feature2)
hidden_model3 = Model(inputs=model.input, outputs=feature3)
hidden_model4 = Model(inputs=model.input, outputs=feature4)
hidden_model5 = Model(inputs=model.input, outputs=feature5)
hidden_model6= Model(inputs=model.input, outputs=feature6)

   # 層化抽出法(Stratified sampling)&KFold

In [None]:
from sklearn.model_selection import StratifiedKFold
from keras.utils import np_utils

In [None]:
y.shape

In [None]:
skf = StratifiedKFold(n_splits=5, random_state=42)
cvscores = []
#Stratified KFoldはlabel encoding化した整数ベクトルでないともちいることができない
x = np.array(x)
y = np.array(y)
for train_index, test_index in skf.split(x, y):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #One-hot化　CNNに入出力できるように整形
    y_train = np_utils.to_categorical(y_train, num_s_class)
    y_test = np_utils.to_categorical(y_test, num_s_class)
    model.fit(x_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epochs,
              callbacks=[early_stopping], validation_split=0.2)
    scores = model.evaluate(x_test, y_test)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)

In [None]:
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

→ validation_dataを用意しないとval_lossを計算できない

# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from scipy.sparse.csgraph import connected_components

In [None]:
y_pred = model.predict(x_valid)

In [None]:
y_valid = np_utils.to_categorical(y_valid)

In [None]:
y_pred = np.array(y_pred)
y_true = np.array(y_valid)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)

In [None]:
print(y_pred.shape)
print(y_true.shape)

In [None]:
cmx = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(12,12))
sns.heatmap(cmx, annot=True)
plt.show()
print('accuracy: {}'.format(accuracy_score
(y_true, y_pred)))

In [None]:
cor_table = pd.read_csv('cor_table.csv')
print(cor_table)

# 中間層特徴量の可視化

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [None]:
hidden1 = hidden_model1.predict(x_train)
hidden2 = hidden_model2.predict(x_train)
hidden3 = hidden_model3.predict(x_train)
hidden4 = hidden_model4.predict(x_train)
hidden5 = hidden_model5.predict(x_train)
hidden6 = hidden_model6.predict(x_train)

In [None]:
print(hidden1.shape)
print(hidden2.shape)
print(hidden3.shape)
print(hidden4.shape)
print(hidden5.shape)
print(hidden6.shape)

#標準化
scaler = StandardScaler()
scaler.fit(hidden1)
hidden1 = scaler.transform(hidden1)
scaler.fit(hidden2)
hidden1 = scaler.transform(hidden2)
scaler.fit(hidden3)
hidden1 = scaler.transform(hidden3)
scaler.fit(hidden4)
hidden1 = scaler.transform(hidden4)
scaler.fit(hidden5)
hidden1 = scaler.transform(hidden5)
scaler.fit(hidden6)
hidden1 = scaler.transform(hidden6)

In [None]:
pca = PCA()

In [None]:
cm_name = 'jet'
cmap = plt.get_cmap(cm_name, num_s_class)

In [None]:
# one-hotから整数値ベクトルに変換
y_train_vec = []
for i in range(len(y_train)):
    y_train_vec.append(np.argmax(y_train[i]))

# 1層目の特徴ベクトル(Conv2D)

In [None]:
feature1 = pca.fit_transform(hidden1)
#　主成分分析の例(1層目)
df_feature1 = pd.DataFrame(feature1)
print(df_feature1)

In [None]:
plt.scatter(feature1[:,0],feature1[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(hidden1)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# 2層目の特徴ベクトル (Conv2D)

In [None]:
feature2 = pca.fit_transform(hidden2)

In [None]:
plt.scatter(feature2[:,0],feature2[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(hidden2)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# 3層目の特徴ベクトル (MaxPooling2D)

In [None]:
feature3 = pca.fit_transform(hidden3)

In [None]:
plt.scatter(feature3[:,0],feature3[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(hidden3)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# 4層目の特徴ベクトル (Conv2D)

In [None]:
feature4 = pca.fit_transform(hidden4)

In [None]:
plt.scatter(feature4[:,0],feature4[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(hidden4)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# 5層目の特徴ベクトル (Conv2D)

In [None]:
feature5 = pca.fit_transform(hidden5)

In [None]:
plt.scatter(feature5[:,0],feature5[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(hidden5)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# 6層目の特徴ベクトル(MaxPooling2D)

In [None]:
feature6 = pca.fit_transform(hidden6)

In [None]:
plt.scatter(feature6[:,0],feature6[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(hidden6)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# grad-cam

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input

In [None]:
true = []
for i in range(num_s_class):
    true.append(i)

In [None]:
pred = []

grad_last_conv = model.get_layer("conv2d_3")
model_output = model.output[:,0]
grads = K.gradients(model_output, grad_last_conv.output)[0]
pooled_grads = K.mean(grads, axis=(0,1,2))
iterate = K.function([model.input], [pooled_grads, grad_last_conv.output[0]])

for n in range(len(grad_test_img_path)):
    img_keras = image.load_img(grad_test_img_path[n], target_size=(224, 224))
    img_tensor = image.img_to_array(img_keras)
    img_tensor = np.expand_dims(img_tensor, axis=0)
    predicts = model.predict(img_tensor)
    s_class_num = np.argmax(predicts)
    pred.append(s_class_num)
    img_tensor /= 255.
    pooled_grads_val, conv_output_val = iterate([img_tensor])
    for i in range(pooled_grads_val.shape[0]):
        conv_output_val[:, :, i] *= pooled_grads_val[i]
    heatmap = np.mean(conv_output_val, axis=-1)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    img = cv2.imread(grad_test_img_path[n])
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = heatmap * 0.4 + img
    save_dir = '../data/gradcam_img/selfCNN/'+s_classlist[n]
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)	# Make a directory
    #保存先ディレクトリ名はその画像のクラス，画像の予測値を画像の名前に書き込む
    cv2.imwrite(save_dir+'/heatmap_pred_'+s_classlist[s_class_num]+'.jpg', superimposed_img)
    print('Saved: ', save_dir+'/heatmap_pred_'+s_classlist[s_class_num]+'.jpg')

In [None]:
cmx = confusion_matrix(true, pred)
plt.figure(figsize=(12,12))
sns.heatmap(cmx, annot=True)
plt.ylabel("truth")
plt.xlabel("prediction")
plt.show()

In [None]:
cor_table = pd.read_csv('cor_table.csv')
print(cor_table)

# VGG16(weight="ImageNet", include_top=False)

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16

In [None]:
vgg_model = VGG16(weights='imagenet', include_top=False)

In [None]:
vgg_model.summary()

top(全結合層など)はデフォルトだとImageNetの全クラス数1000での出力になるので，
自作のアーキテクチャを使用する
top以外のモデルの重みは(ImageNetで)固定しない→学習をする

Fine-tuning(Flozen(Not training vgg layers))
training all layers -> acc: 0.1...

In [None]:
inputs = Input(shape=(224,224,3))
x1 = vgg_model(inputs)
x2 = Flatten()(x1)
x3 = Dense(256, activation='relu')(x2)
prediction = Dense(num_s_class, activation='softmax')(x3)
full_model = Model(inputs=inputs, outputs=prediction)
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, mode='auto')
for layer in vgg_model.layers[:15]:
    layer.trainable = False
full_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [None]:
full_model.summary()

In [None]:
skf = StratifiedKFold(n_splits=5, random_state=42)
cvscores = []
#Stratified KFoldはlabel encoding化した整数ベクトルでないともちいることができない
for train_index, test_index in skf.split(x, y):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #One-hot化　CNNに入出力できるように整形
    y_train = np_utils.to_categorical(y_train, num_s_class)
    y_test = np_utils.to_categorical(y_test, num_s_class)
    full_model.fit(x_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epochs,
              callbacks=[early_stopping], validation_split=0.2)
    scores = full_model.evaluate(x_test, y_test)
    print("%s: %.2f%%" % (full_model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)

In [None]:
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

In [None]:
y_pred = full_model.predict(x_valid)

In [None]:
y_pred = np.array(y_pred)
y_true = np.array(y_valid)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)

# Confusion Matrix

In [None]:
cmx = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(12,12))
sns.heatmap(cmx, annot=True)
plt.show()
print('accuracy: {}'.format(accuracy_score
(y_true, y_pred)))

In [None]:
cor_table = pd.read_csv('cor_table.csv')
print(cor_table)

# 中間層特徴量の可視化

In [None]:
x1 = vgg_model.get_layer('block1_pool').output
output1 = GlobalMaxPooling2D()(x1)
block_model1 = Model(inputs=vgg_model.input, outputs=output1)

In [None]:
x2 = vgg_model.get_layer('block2_pool').output
output2 = GlobalMaxPooling2D()(x2)
block_model2 = Model(inputs=vgg_model.input, outputs=output2)

In [None]:
x3 = vgg_model.get_layer('block3_pool').output
output3 = GlobalMaxPooling2D()(x3)
block_model3 = Model(inputs=vgg_model.input, outputs=output3)

In [None]:
x4 = vgg_model.get_layer('block4_pool').output
output4 = GlobalMaxPooling2D()(x4)
block_model4 = Model(inputs=vgg_model.input, outputs=output4)

In [None]:
x5 = vgg_model.get_layer('block5_pool').output
output5 = GlobalMaxPooling2D()(x5)
block_model5 = Model(inputs=vgg_model.input, outputs=output5)

In [None]:
block1 = block_model1.predict(x_train)
block2 = block_model2.predict(x_train)
block3 = block_model3.predict(x_train)
block4 = block_model4.predict(x_train)
block5 = block_model5.predict(x_train)

In [None]:
print(block1.shape)
print(block2.shape)
print(block3.shape)
print(block4.shape)
print(block5.shape)

# block1の特徴ベクトル

In [None]:
feature1 = pca.fit_transform(block1)
#　主成分分析の例(1層目)
df_feature1 = pd.DataFrame(feature1)
print(df_feature1)

In [None]:
plt.scatter(feature1[:,0],feature1[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(block1)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# block2の特徴ベクトル 

In [None]:
feature2 = pca.fit_transform(block2)

In [None]:
plt.scatter(feature2[:,0],feature2[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(block2)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# block3の特徴ベクトル 

In [None]:
feature3 = pca.fit_transform(block3)

In [None]:
plt.scatter(feature3[:,0],feature3[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(block3)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# block4の特徴ベクトル

In [None]:
feature4 = pca.fit_transform(block4)

In [None]:
plt.scatter(feature4[:,0],feature4[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(block4)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# block5の特徴ベクトル 

In [None]:
feature5 = pca.fit_transform(block5)

In [None]:
plt.scatter(feature5[:,0],feature5[:,1],marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
pca.fit(block5)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

In [None]:
ax = plt.figure().add_subplot(111, projection='3d')
ax.scatter(feature5[:,0],feature5[:,1], feature5[:,3], marker=".", c=y_train_vec, cmap=cmap)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
ax.set_zlabel("PC3")
plt.show()

# grad-cam

# Inception v3 (Pretrained ImageNet)

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3

In [None]:
base_model = InceptionV3(weights='imagenet', include_top=False)

In [None]:
inputs = Input(shape=(224,224,3))
x1 = base_model(inputs)
x2 = Flatten()(x1)
x3 = Dense(256, activation='relu')(x2)
prediction = Dense(num_s_class, activation='softmax')(x3)
full_model = Model(inputs=inputs, outputs=prediction)
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, mode='auto')
for layer in model.layers[:249]:
    layer.trainable = False
    if layer.name.startswith('batch_normalization'):
        layer.trainable = True
full_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

top(全結合層など)はデフォルトだとImageNetの全クラス数1000での出力になるので，
自作のアーキテクチャを使用する
top以外のモデルの重みは(ImageNetで)固定しない→学習をする

Fine-tuning(Flozen(Not training InceptionV3 layers))

In [None]:
skf = StratifiedKFold(n_splits=5, random_state=42)
cvscores = []
#Stratified KFoldはlabel encoding化した整数ベクトルでないともちいることができない
x = np.array(x)
y = np.array(y)
for train_index, test_index in skf.split(x, y):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    #One-hot化　CNNに入出力できるように整形
    y_train = np_utils.to_categorical(y_train, num_s_class)
    y_test = np_utils.to_categorical(y_test, num_s_class)
    full_model.fit(x_train, y_train, verbose=1, batch_size=batch_size, epochs=n_epochs,
              callbacks=[early_stopping], validation_split=0.2)
    scores = full_model.evaluate(x_test, y_test)
    print("%s: %.2f%%" % (full_model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)

In [None]:
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

In [None]:
y_pred = full_model.predict(x_valid)

In [None]:
y_pred = np.array(y_pred)
y_true = np.array(y_valid)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_true, axis=1)

# Confusion Matrix

In [None]:
cmx = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(12,12))
sns.heatmap(cmx, annot=True)
plt.show()
print('accuracy: {}'.format(accuracy_score
(y_true, y_pred)))

In [None]:
cor_table = pd.read_csv('cor_table.csv')
print(cor_table)

#  最終層の特徴ベクトル 

In [None]:
x_last = base_model.get_layer('mixed10').output
output_last = GlobalMaxPooling2D()(x_last)
model_last = Model(inputs=base_model.input, outputs=output_last)

In [None]:
last_feature = model_last.predict(x_train)

In [None]:
print(last_feature.shape)

In [None]:
last_pca = pca.fit_transform(last_feature)

In [None]:
plt.scatter(last_pca[:,0],last_pca[:,1], marker=".", c=y_train_vec, cmap=cmap)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.colorbar()
plt.show()

In [None]:
ax = plt.figure().add_subplot(111, projection='3d')
ax.scatter(last_pca[:,0],last_pca[:,1], last_pca[:,3], marker=".", c=y_train_vec, cmap=cmap)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
ax.set_zlabel("PC3")
plt.show()

In [None]:
pca.fit(last_feature)
ev_ratio = pca.explained_variance_ratio_
ev_ratio = np.hstack([0, ev_ratio.cumsum()])
plt.xlabel("num of components")
plt.ylabel("explained variance ratio")
plt.plot(ev_ratio)
plt.show()

# grad-cam

# 非線形成分を考慮した次元削減
PCAは線形成分に注目した次元削減方法なので，非線形性を考慮した手法を試す

# 中間層の特徴量エンジニアリング

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

元画像(x_train)についてRandom Forestを行う

In [None]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42)

In [None]:
#画像データそのままだとRandomForestできないのでGlobalMaxPooling2Dだけかける
x_train_pool = GlobalMaxPooling2D()(x_train)

In [None]:
rnd_clf.fit(x_train_pool, y_train_vec)

In [None]:
y_pred_rf = rnd_clf.predict(x_test)

In [None]:
y_pred_rf = np.array(y_pred_rf)
y_true = np.array(y_test)
y_pred_rf = np.argmax(y_pred_rf, axis=1)
y_true = np.argmax(y_test, axis=1)

In [None]:
print(y_pred_rf.shape)
print(y_true.shape)

In [None]:
cmx = confusion_matrix(y_true, y_pred_rf)
plt.figure(figsize=(12,12))
sns.heatmap(cmx, annot=True)
plt.show()
print('accuracy: {}'.format(accuracy_score
(y_true, y_pred_rf)))

In [None]:
feature_importance = rnd_clf.feature_importances_
print(feature_importance)
print("shape:", len(feature_importance))

InceptionV3の最終層の特徴量についてRandom Forestを行う

In [None]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(last_feature, y_train_vec)

In [None]:
last_feature_importance = rnd_clf.feature_importances_
print(last_feature_importance)
print("shape:", len(last_feature_importance))

In [None]:
plt.imshow(last_feature_importance.reshape(32, 64), cmap="Reds")
plt.colorbar()
plt.show()

# BoVW→k-means→PCA

# ResNet50 (keras NotPretrained ImageNet)<br>
include_top: ネットワークの出力層側にある全結合層を含むかどうか．<br>
weights: None (ランダム初期化) か 'imagenet' (ImageNetで学習した重み) の一方．<br>
input_tensor: モデルの入力画像として利用するためのオプションのKerasテンソル (つまり，layers.Input()の出力)<br>
input_shape: オプショナルなshapeのタプル，include_topがFalseの場合のみ指定可能 (そうでないときは入力のshapeは(224, 224, 3) ('channels_last'データフォーマットのとき) か (3, 224, 224) ('channels_first'データフォーマットのとき) )．正確に3つの入力チャンネルをもつ必要があり，width と height は197以上にする必要があります．例えば(200, 200, 3)は有効値．<br>
pooling: 特徴量抽出のためのオプショナルなpooling mode，include_topがFalseの場合のみ指定可能．<br>
    None：モデルの出力が，最後のconvolutional layerの4階テンソルであることを意味しています．<br>
    'avg'：最後のconvolutional layerの出力にglobal average poolingが適用されることで，モデルの出力が2階テンソルになることを意味しています．<br>
    'max'：global max poolingが適用されることを意味します．<br>
classes: 画像のクラス分類のためのオプショナルなクラス数，include_topがTrueかつweightsが指定されていない場合のみ指定可能<br>

# GBDT