# 3D-CNN

3D Convolutional Neural Networkを使ってvideo classificationを行う. 

- Enviroment
 - Dataset：UCF-101
 - Keras

In [55]:
import argparse
import os

import matplotlib
matplotlib.use('AGG')
import matplotlib.pyplot as plt
import numpy as np
from keras.datasets import cifar10
from keras.layers import (Activation, Conv3D, Dense, Dropout, Flatten,
                          MaxPooling3D)
from keras.layers.advanced_activations import LeakyReLU
from keras.losses import categorical_crossentropy
from keras.models import Sequential
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split

import videoto3d
from tqdm import tqdm

In [56]:
#modelのaccuracy, lossのグラフ保存
def plot_history(history, result_dir):
    '''model accuracy'''
    plt.plot(history.history['acc'], marker='.')
    plt.plot(history.history['val_acc'], marker='.')
    plt.title('model accuracy')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.grid()
    plt.legend(['acc', 'val_acc'], loc='lower right')
    plt.savefig(os.path.join(result_dir, 'model_accuracy.png'))
    plt.close()
    
    '''model loss'''
    plt.plot(history.history['loss'], marker='.')
    plt.plot(history.history['val_loss'], marker='.')
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.grid()
    plt.legend(['loss', 'val_loss'], loc='upper right')
    plt.savefig(os.path.join(result_dir, 'model_loss.png'))
    plt.close()

In [57]:
#modelのaccuracy, lossのデータをtxtファイルで保存
#historyオブジェクトには，各epochのlossやaccuracyが保存されている
def save_history(history, result_dir):
    loss = history.history['loss']
    acc = history.history['acc']
    val_loss = history.history['val_loss']
    val_acc = history.history['val_acc']
    nb_epoch = len(acc)

    with open(os.path.join(result_dir, 'result.txt'), 'w') as fp:
        fp.write('epoch\tloss\tacc\tval_loss\tval_acc\n')
        for i in range(nb_epoch):
            fp.write('{}\t{}\t{}\t{}\t{}\n'.format(
                i, loss[i], acc[i], val_loss[i], val_acc[i]))

In [70]:
def loaddata(video_dir, vid3d, nclass, result_dir, color=False, skip=True):
    classes = os.listdir(video_dir) # 101 classのlist
    #classes = ['Diving']
    #print(classes)
    X = []
    labels = []
    labellist = []
    
    pbar = tqdm(total=len(classes)) # 実行進度を表示(開始宣言)
    for class_name in classes:
        files = os.listdir(video_dir + class_name) # class_nameの全動画ファイル名を取得
        for filename in files:
            if filename == '.DS_Store':
                continue
            name = os.path.join(video_dir, class_name, filename)
            label = vid3d.get_UCF_classname(class_name) # get class label
            if label not in labellist: # labellistに無かったら追加
                if len(labellist) >= nclass:
                    continue
                labellist.append(label)
            labels.append(label)
            X.append(vid3d.video3d(name, color=color, skip=skip)) # Xにフレームnameの3D-dataを追加
        pbar.update(1)

    pbar.close() # 実行進度を表示(終了宣言)
    
    with open(os.path.join(result_dir, 'classes.txt'), 'w') as fp:
        for i in range(len(labellist)):
            fp.write('{}\n'.format(labellist[i]))

    for num, label in enumerate(labellist): # labellistのlabel indexを取得
        for i in range(len(labels)):
            if label == labels[i]:
                labels[i] = num # labels:label index(0~100)
    # X:(frame, width, height, depth, RGB_channel)?
    if color:
        return np.array(X).transpose((0, 2, 3, 4, 1)), labels
    else:
        return np.array(X).transpose((0, 2, 3, 1)), labels

In [71]:
batch = 128
epoch = 100
videos = '/export/data/dataset/UCF-101/'
nclass = 101
color = False # RGB image or grayscale image
skip = True   # フレームを間隔を空けるか連続してとるか
depth = 10    # the number of frames to use
output = '/export/space/mizuno-s/jupyter/notebook/video_classification/3d-cnn'

img_rows, img_cols, frames = 32, 32, depth
channel = 3 if color else 1
fname_npz = 'dataset_{}_{}_{}.npz'.format(nclass, depth, skip)

In [72]:
vid3d = videoto3d.Videoto3D(img_rows, img_cols, frames) # width:img_rows, height:img_cols, depth:frames
nb_classes = nclass
if os.path.exists(fname_npz):
    loadeddata = np.load(fname_npz)
    X, Y = loadeddata["X"], loadeddata["Y"]
else:
    x, y = loaddata(videos, vid3d, nclass, output, color, skip)
    X = x.reshape((x.shape[0], img_rows, img_cols, frames, channel))
    Y = np_utils.to_categorical(y, nb_classes)

    X = X.astype('float32')
    np.savez(fname_npz, X=X, Y=Y)
    print('Saved dataset to dataset.npz.')
print('X_shape:{}\nY_shape:{}'.format(X.shape, Y.shape))









  0%|          | 0/101 [00:00<?, ?it/s][A[A[A[A[A[A[A[A







  1%|          | 1/101 [00:19<31:40, 19.00s/it][A[A[A[A[A[A[A[A







  2%|▏         | 2/101 [00:30<27:23, 16.61s/it][A[A[A[A[A[A[A[A







  3%|▎         | 3/101 [00:48<27:47, 17.02s/it][A[A[A[A[A[A[A[A







  4%|▍         | 4/101 [01:05<27:37, 17.09s/it][A[A[A[A[A[A[A[A







  5%|▍         | 5/101 [01:16<24:29, 15.31s/it][A[A[A[A[A[A[A[A







  6%|▌         | 6/101 [01:40<28:21, 17.91s/it][A[A[A[A[A[A[A[A







  7%|▋         | 7/101 [01:44<21:38, 13.81s/it][A[A[A[A[A[A[A[A







  8%|▊         | 8/101 [01:48<16:44, 10.80s/it][A[A[A[A[A[A[A[A







  9%|▉         | 9/101 [01:54<14:17,  9.33s/it][A[A[A[A[A[A[A[A







 10%|▉         | 10/101 [01:59<12:19,  8.12s/it][A[A[A[A[A[A[A[A







 11%|█         | 11/101 [02:03<10:24,  6.94s/it][A[A[A[A[A[A[A[A







 12%|█▏        | 12/101 [02:09<09:42,

Saved dataset to dataset.npz.
X_shape:(13320, 32, 32, 10, 1)
Y_shape:(13320, 101)


In [46]:
# Define model
model = Sequential()
model.add(Conv3D(32, kernel_size=(3, 3, 3), input_shape=(X.shape[1:]), border_mode='same'))
model.add(Activation('relu'))
model.add(Conv3D(32, kernel_size=(3, 3, 3), border_mode='same'))
model.add(Activation('softmax'))
model.add(MaxPooling3D(pool_size=(3, 3, 3), border_mode='same'))
model.add(Dropout(0.25))

model.add(Conv3D(64, kernel_size=(3, 3, 3), border_mode='same'))
model.add(Activation('relu'))
model.add(Conv3D(64, kernel_size=(3, 3, 3), border_mode='same'))
model.add(Activation('softmax'))
model.add(MaxPooling3D(pool_size=(3, 3, 3), border_mode='same'))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))

model.compile(loss=categorical_crossentropy,optimizer=Adam(), metrics=['accuracy'])

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_1 (Conv3D)            (None, 32, 32, 10, 32)    896       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 10, 32)    0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 32, 32, 10, 32)    27680     
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 10, 32)    0         
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 11, 11, 4, 32)     0         
_________________________________________________________________
dropout_1 (Dropout)  

  This is separate from the ipykernel package so we can avoid doing imports until
  """
  import sys
  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':
  


In [48]:
plot_model(model, show_shapes=True,to_file=os.path.join(output, 'model.png'))

In [49]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=43)

In [50]:
history = model.fit(X_train, Y_train, validation_data=(X_test,Y_test), batch_size=batch,epochs=epoch, verbose=1, shuffle=True)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 10656 samples, validate on 2664 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Ep

In [51]:
model.evaluate(X_test, Y_test, verbose=0)
model_json = model.to_json()
if not os.path.isdir(output):
    os.makedirs(output)
with open(os.path.join(output, 'ucf101_3dcnnmodel.json'), 'w') as json_file:
    json_file.write(model_json)
model.save_weights(os.path.join(output, 'ucf101_3dcnnmodel.hd5'))

In [53]:
loss, acc = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', loss)
print('Test accuracy:', acc)
plot_history(history, output)
save_history(history, output)

Test loss: 0.8642218632025046
Test accuracy: 0.793918918918919
