From: https://medium.com/smileinnovation/training-neural-network-with-image-sequence-an-example-with-video-as-input-c3407f7a0b0f

Version History:
- v1.0.0: Just using the tutorial code and running.
- v1.1.0: Added methods to return class per video in each dataset and make confusion matrix for analysis.

In [1]:
import os
import glob
import keras 
from keras_video import VideoFrameGenerator
from keras.layers import Conv2D, BatchNormalization, \
    MaxPool2D, GlobalMaxPool2D
from keras.layers import TimeDistributed, GRU, Dense, Dropout
from keras.models import load_model
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import re
import random

Using TensorFlow backend.


In [3]:
# testing with 2 classes:
classes = ['class_00', 'class_10']

In [2]:
# use sub directories names as classes
test_classes = [i.split(os.path.sep)[5] for i in glob.glob('F:\\DS_Datasets\\DS1\\DS1nmax_split\\test\\*')]
test_classes.sort()

train_classes = [i.split(os.path.sep)[5] for i in glob.glob('F:\\DS_Datasets\\DS1\\DS1nmax_split\\train\\*')]
train_classes.sort()

ds3_classes = [i.split(os.path.sep)[5] for i in glob.glob('D:\\DS_Datasets\\Updated_DS_AVI\\DS3\\DS3Nmax\\*')]
ds3_classes.sort()

In [3]:
classes = test_classes

In [4]:
# some global params
SIZE = (128, 128)
CHANNELS = 3
NBFRAME = 16
BS = 8

In [5]:
# pattern to get videos and classes
#glob_pattern='D:\\DS_Datasets\\Updated_DS_AVI\\DS1\\DS1Nmax test\\{classname}\\*.avi'
test_pattern = 'F:\\DS_Datasets\\DS1\\DS1nmax_split\\test\\{classname}\\*.avi'
train_pattern = 'F:\\DS_Datasets\\DS1\\DS1nmax_split\\train\\{classname}\\*.avi'
ds3_pattern = 'D:\\DS_Datasets\\Updated_DS_AVI\\DS3\\DS3Nmax\\{classname}\\*.avi'

In [6]:
# for data augmentation
data_aug = keras.preprocessing.image.ImageDataGenerator(
    zoom_range=.1,
    horizontal_flip=True,
    rotation_range=8,
    width_shift_range=.2,
    height_shift_range=.2)

In [7]:
# Create video frame generator
train = VideoFrameGenerator(
    classes=classes, 
    glob_pattern=train_pattern,
    nb_frames=NBFRAME,
    shuffle=True,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

Total data: 22 classes for 4133 files for train


In [10]:
ds3 = VideoFrameGenerator(
    classes=ds3_classes, 
    glob_pattern=ds3_pattern,
    nb_frames=NBFRAME,
    shuffle=True,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

Total data: 19 classes for 11191 files for train


In [11]:
test = VideoFrameGenerator(
    classes=classes, 
    glob_pattern=test_pattern,
    nb_frames=NBFRAME,
    shuffle=False,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

Total data: 22 classes for 1319 files for train


In [12]:
def class_per_video(vdg, classlist):
    #vdg = video data generator
    class_indices = []
    for i in range(vdg.files_count):
        class_indices.append([classlist.index(x) for x in classlist if x in vdg._get_classname(vdg.files[i])][0])
    return class_indices

In [13]:
def build_convnet(shape=(128, 128, 3)):
    momentum = .9
    model = keras.Sequential()
    model.add(Conv2D(64, (3,3), input_shape=shape,
              padding='same', activation='relu'))
    model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    model.add(MaxPool2D())
    
    model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
   
    model.add(MaxPool2D())

    model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    model.add(MaxPool2D())
   
    model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    # flatten...
    model.add(GlobalMaxPool2D())
    return model

In [14]:
def action_model(shape=(5, 128, 128, 3), nbout=len(classes)):
    # Create our convnet with (112, 112, 3) input shape
    convnet = build_convnet(shape[1:])
    
    # then create our final model
    model = keras.Sequential()    # add the convnet with (5, 112, 112, 3) shape
    model.add(TimeDistributed(convnet, input_shape=shape))    # here, you can also use GRU or LSTM
    model.add(GRU(64))    # and finally, we make a decision network
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(nbout, activation='softmax'))
    return model

In [15]:
INSHAPE=(NBFRAME,) + SIZE + (CHANNELS,) # (5, 112, 112, 3)
model = action_model(INSHAPE, len(classes))
optimizer = keras.optimizers.Adam(0.001)
model.compile(
    optimizer,
    'categorical_crossentropy',
    metrics=['acc']
)

In [16]:
EPOCHS=20

# create a "chkp" directory before to run that
# because ModelCheckpoint will write models inside

callbacks = [
    keras.callbacks.ReduceLROnPlateau(verbose=1),
    keras.callbacks.ModelCheckpoint(
        'chkp/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
        verbose=1),
]

In [17]:
model = load_model("ds1_convnet_50epochs_model.h5")

In [18]:
predict = model.predict_generator(ds3, verbose=1)

  26/1398 [..............................] - ETA: 1:56:07

Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_07\14W589.avi, 6 total, 6 extracted


  48/1398 [>.............................] - ETA: 1:54:19

Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_03\6W589.avi, 6 total, 6 extracted


  84/1398 [>.............................] - ETA: 1:50:20

Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_13\26W589.avi, 6 total, 6 extracted


 146/1398 [==>...........................] - ETA: 1:44:11

Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_08\16W589.avi, 6 total, 6 extracted


 294/1398 [=====>........................] - ETA: 1:31:45

Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_10\20W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_17\34W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_00\2W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_19\38W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_06\12W_cannon589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_15\30W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_14\28W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_09\18W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_12\24W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_02\4W_2589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_11\22W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_16\32W589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_05\10W_cannon589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_04\8W_cannon589.avi, 6 total, 6 extracted




Frame count is not OK for video D:\DS_Datasets\Updated_DS_AVI\DS3\DS3Nmax\class_18\36W589.avi, 6 total, 6 extracted




In [21]:
print(len(ds3))

1398


In [22]:
print(len(predict))

11165


In [23]:
y_pred_report = np.argmax(predict, axis = 1)

In [24]:
class_indices = class_per_video(ds3, classes)

In [25]:
print(len(y_pred_report))

11165


In [26]:
print(len(class_indices))

11191


In [27]:
print('Confusion Matrix')
matrix=confusion_matrix(class_indices[:-26], y_pred_report)
print(matrix)

Confusion Matrix
[[ 85  16 135  13   2   1   0   3   1   0  19  14   4  10  85  40   0   3
    0  16 142]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]
 [131  19 106   7   2   0   0   0   0   0  14  13   8   9  80  36   0   1
    0  18 145]
 [ 96  15 127  10   1   0   0   1   3   1  19  14   4   7  81  24   0   3
    2  22 159]
 [ 97  14 126  11   2   0   0   2   1   0  12  13   4   6  70  29   0   4
    0  17 181]
 [ 92  17 142  10   1   1   0   0   1   0  22   9   8  11  73  34   0   1
    1  21 145]
 [102  22 133   7   3   1   0   1   2   0  21  17   6   6  67  34   0   2
    1  16 148]
 [101  19 124   9   4   0   0   2   1   0  14  12   4  16  79  32   0   3
    1  19 149]
 [ 99  13 125   7   3   0   0   1   0   0  15  14   4  10  78  45   0   2
    1  21 151]
 [ 90  17 129  12   0   1   0   2   3   1  12  12   7  10  91  30   0   2
    1  21 148]
 [105  10 134   3   3   0   0   0   2   0  19  19   7   9  85  36   0   2
    1  25 129]
 [11

In [171]:
shuffled_pred = y_pred_report
random.shuffle(shuffled_pred)

In [172]:
print(shuffled_pred)

[1 1 1 0 1 1 0 1 1 1 1 0 0 0 1 1 0 0 0 0 1 1 0 0 0 0 0 1 1 1 1 0 1 0 0 1 1
 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 1
 1 1 0 1 1 1 1 1 0 1 0 1 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1
 1 0 1 1 0 1 1 1 1]


In [173]:
print('Confusion Matrix')
matrix=confusion_matrix(class_indices, shuffled_pred)
print(matrix)

Confusion Matrix
[[29 31]
 [31 29]]


In [21]:
plt.plot(history.history['loss'], label='loss');
plt.plot(history.history['val_loss'], label='val_loss');
plt.xlabel('epoch');
plt.title('Loss and Validation Loss per Epoch')
plt.legend();
plt.savefig('loss_val_loss_convnet.png')

In [None]:
plt.plot(history.history['acc'], label='accuracy');
plt.plot(history.history['val_acc'], label='val_accuracy');
plt.xlabel('epoch');
plt.title('Accuracy and Validation Accuracy per Epoch')
plt.legend();
plt.savefig('acc_val_acc_convnet.png')