In [None]:
#video classification
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Dense, Flatten, Conv3D, MaxPooling3D, Dropout, BatchNormalization, LeakyReLU
from keras.utils import to_categorical
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report


file = np.load('data/videos.npz',allow_pickle=True)
videos, labels = file['train']
#print(videos.shape)

proper_videos = []

for i in videos:
	proper_videos.append(i)

videos = np.array(proper_videos)

del proper_videos

reshape_videos = []

# 1/x number of frames in video '80/x'
fraction = 5
frames = int(80/fraction) #change to 80 ... match paper

for i in videos:
	single_video = []
	for j in range( i.shape[0] ): #HEREi.shape[0]
		if(  j % fraction == 0 ):
			single_video.append( i[j] )
	reshape_videos.append( single_video )

reshape_videos = np.array(reshape_videos)
print("18 frame video shape: ", reshape_videos.shape)

videos = reshape_videos

del reshape_videos

x_train, x_test, y_train, y_test = train_test_split(videos, labels, test_size=0.166)#, random_state=42) #size=0.2

shape = (128,128,frames,1)

x_train_input_shape = (x_train.shape[0],128,128,frames,1)
x_test_input_shape = (x_test.shape[0],128,128,frames,1)

x_train = np.reshape(x_train, x_train_input_shape)
#x_train = x_train.astype('float32')/255

x_test = np.reshape(x_test, x_test_input_shape)
#x_test = x_test.astype('float32')/255

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print("Training data shape: ",x_train.shape)
print("Training labels shape: ", y_train.shape)

#print(np.any(np.isnan(x_train)))
#print(np.any(np.isnan(y_train)))

#print(np.any(np.isnan(x_test)))
#print(np.any(np.isnan(y_test)))


def model_3d(sample_shape):
	model = Sequential()
	#layer 1
	model.add(Conv3D(64, kernel_size=(3,3,3), strides=(2,2,1), input_shape=sample_shape))
	#model.add(MaxPooling3D(pool_size=(2, 2, 1)))
	model.add(LeakyReLU(alpha=0.2))
	
	#layer 2
	model.add(Conv3D(128, kernel_size=(3,3,3), strides=(2,2,1), padding='SAME'))
	#model.add(MaxPooling3D(pool_size=(2, 2, 1)))
	model.add(BatchNormalization())
	model.add(LeakyReLU(alpha=0.2))
	
	#layer 3
	model.add(Conv3D(256, kernel_size=(3,3,3), strides=(2,2,1), padding='SAME'))
	#model.add(MaxPooling3D(pool_size=(2, 2, 1)))
	model.add(BatchNormalization())
	model.add(LeakyReLU(alpha=0.2))

	#layer 4
	model.add(Conv3D(512, kernel_size=(3,3,3), strides=(2,2,2), padding='SAME'))
	#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
	model.add(BatchNormalization())
	model.add(LeakyReLU(alpha=0.2))

	#layer 5
	model.add(Conv3D(1024, kernel_size=(3,3,3), strides=(2,2,2), padding='SAME'))
	#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
	model.add(BatchNormalization())
	model.add(LeakyReLU(alpha=0.2))

	#layer 6
	model.add(Conv3D(2048, kernel_size=(3,3,3), strides=(2,2,2), padding='SAME'))
	#model.add(MaxPooling3D(pool_size=(2, 2, 2)))
	model.add(BatchNormalization())
	model.add(LeakyReLU(alpha=0.2))

	#layer 7
	model.add(Flatten())

	#hypothetical fusion layer
	model.add(BatchNormalization())
	model.add(Dense(2, activation='softmax')) #or softmax
	return model

#train
train = model_3d(shape)

train.compile(loss='categorical_crossentropy',
		optimizer=optimizers.Adam(lr=0.00001),
		metrics=['accuracy'])

train.summary()


#add in k-fold
history = train.fit(x_train, y_train,
			batch_size=30,
			epochs=30,
			verbose=1,
			validation_split=0.166) #validation_split=0.3


train.save('models/3d-cnn-softmax.hdf5')


#test
model = load_model('models/3d-cnn-softmax.hdf5')


score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Y_test = np.argmax(y_test, axis=1) # Convert one-hot to index
y_pred = model.predict_classes(x_test)
print(classification_report(Y_test, y_pred))


Sigmoid activation in final layer (My improvement):
------------------------------------------------------------------------

18 frame video shape:  (384, 16, 128, 128)
Training data shape:  (320, 128, 128, 16, 1)
Training labels shape:  (320, 2)
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   

conv3d_12 (Conv3D)           (None, 63, 63, 14, 64)    1792      
_________________________________________________________________
leaky_re_lu_12 (LeakyReLU)   (None, 63, 63, 14, 64)    0         
_________________________________________________________________
conv3d_13 (Conv3D)           (None, 32, 32, 14, 128)   221312    
_________________________________________________________________
batch_normalization_12 (Batc (None, 32, 32, 14, 128)   512       
_________________________________________________________________
leaky_re_lu_13 (LeakyReLU)   (None, 32, 32, 14, 128)   0         
_________________________________________________________________
conv3d_14 (Conv3D)           (None, 16, 16, 14, 256)   884992    
_________________________________________________________________
batch_normalization_13 (Batc (None, 16, 16, 14, 256)   1024      
_________________________________________________________________
leaky_re_lu_14 (LeakyReLU)   (None, 16, 16, 14, 256)   0         
_________________________________________________________________
conv3d_15 (Conv3D)           (None, 8, 8, 7, 512)      3539456   
_________________________________________________________________
batch_normalization_14 (Batc (None, 8, 8, 7, 512)      2048      
_________________________________________________________________
leaky_re_lu_15 (LeakyReLU)   (None, 8, 8, 7, 512)      0         
_________________________________________________________________
conv3d_16 (Conv3D)           (None, 4, 4, 4, 1024)     14156800  
_________________________________________________________________
batch_normalization_15 (Batc (None, 4, 4, 4, 1024)     4096      
_________________________________________________________________
leaky_re_lu_16 (LeakyReLU)   (None, 4, 4, 4, 1024)     0         
_________________________________________________________________
conv3d_17 (Conv3D)           (None, 2, 2, 2, 2048)     56625152  
_________________________________________________________________
batch_normalization_16 (Batc (None, 2, 2, 2, 2048)     8192      
_________________________________________________________________
leaky_re_lu_17 (LeakyReLU)   (None, 2, 2, 2, 2048)     0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 16384)             0         
_________________________________________________________________
batch_normalization_17 (Batc (None, 16384)             65536     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 32770     

Total params: 75,543,682
Trainable params: 75,502,978
Non-trainable params: 40,704
_________________________________________________________________
Epoch 1/30
9/9 [==============================] - 180s 20s/step - loss: 0.8067 - accuracy: 0.6579 - val_loss: 0.6951 - val_accuracy: 0.5185
Epoch 2/30
9/9 [==============================] - 178s 20s/step - loss: 0.2384 - accuracy: 0.9135 - val_loss: 0.6748 - val_accuracy: 0.5556
Epoch 3/30
9/9 [==============================] - 180s 20s/step - loss: 0.1253 - accuracy: 0.9774 - val_loss: 0.6738 - val_accuracy: 0.5556
Epoch 4/30
9/9 [==============================] - 179s 20s/step - loss: 0.1086 - accuracy: 0.9812 - val_loss: 0.6693 - val_accuracy: 0.5556
Epoch 5/30
9/9 [==============================] - 180s 20s/step - loss: 0.0627 - accuracy: 0.9925 - val_loss: 0.6597 - val_accuracy: 0.5741
Epoch 6/30
9/9 [==============================] - 181s 20s/step - loss: 0.0614 - accuracy: 0.9812 - val_loss: 0.6487 - val_accuracy: 0.6852
Epoch 7/30
9/9 [==============================] - 180s 20s/step - loss: 0.0374 - accuracy: 0.9850 - val_loss: 0.6381 - val_accuracy: 0.7037
Epoch 8/30
9/9 [==============================] - 180s 20s/step - loss: 0.0363 - accuracy: 0.9925 - val_loss: 0.6201 - val_accuracy: 0.6852
Epoch 9/30
9/9 [==============================] - 180s 20s/step - loss: 0.0359 - accuracy: 0.9962 - val_loss: 0.6107 - val_accuracy: 0.6667
Epoch 10/30
9/9 [==============================] - 180s 20s/step - loss: 0.0249 - accuracy: 0.9887 - val_loss: 0.5868 - val_accuracy: 0.7407
Epoch 11/30
9/9 [==============================] - 181s 20s/step - loss: 0.0198 - accuracy: 0.9925 - val_loss: 0.5664 - val_accuracy: 0.8333
Epoch 12/30
9/9 [==============================] - 179s 20s/step - loss: 0.0105 - accuracy: 0.9962 - val_loss: 0.5450 - val_accuracy: 0.8148
Epoch 13/30
9/9 [==============================] - 180s 20s/step - loss: 0.0141 - accuracy: 1.0000 - val_loss: 0.5195 - val_accuracy: 0.8148
Epoch 14/30
9/9 [==============================] - 180s 20s/step - loss: 0.0117 - accuracy: 0.9962 - val_loss: 0.4864 - val_accuracy: 0.7963
Epoch 15/30
9/9 [==============================] - 179s 20s/step - loss: 0.0110 - accuracy: 0.9962 - val_loss: 0.4506 - val_accuracy: 0.8704
Epoch 16/30
9/9 [==============================] - 179s 20s/step - loss: 0.0092 - accuracy: 1.0000 - val_loss: 0.4197 - val_accuracy: 0.8889
Epoch 17/30
9/9 [==============================] - 180s 20s/step - loss: 0.0036 - accuracy: 1.0000 - val_loss: 0.3940 - val_accuracy: 0.8889
Epoch 18/30
9/9 [==============================] - 180s 20s/step - loss: 0.0030 - accuracy: 1.0000 - val_loss: 0.3661 - val_accuracy: 0.8889
Epoch 19/30
9/9 [==============================] - 179s 20s/step - loss: 0.0063 - accuracy: 0.9962 - val_loss: 0.3323 - val_accuracy: 0.9074
Epoch 20/30
9/9 [==============================] - 181s 20s/step - loss: 0.0023 - accuracy: 1.0000 - val_loss: 0.3041 - val_accuracy: 0.9259
Epoch 21/30
9/9 [==============================] - 181s 20s/step - loss: 0.0040 - accuracy: 1.0000 - val_loss: 0.2781 - val_accuracy: 0.9259
Epoch 22/30
9/9 [==============================] - 180s 20s/step - loss: 0.0067 - accuracy: 1.0000 - val_loss: 0.2498 - val_accuracy: 0.9074
Epoch 23/30
9/9 [==============================] - 181s 20s/step - loss: 0.0027 - accuracy: 1.0000 - val_loss: 0.2419 - val_accuracy: 0.9074
Epoch 24/30
9/9 [==============================] - 179s 20s/step - loss: 0.0025 - accuracy: 1.0000 - val_loss: 0.2242 - val_accuracy: 0.9074
Epoch 25/30
9/9 [==============================] - 180s 20s/step - loss: 6.5738e-04 - accuracy: 1.0000 - val_loss: 0.2123 - val_accuracy: 0.9074
Epoch 26/30
9/9 [==============================] - 181s 20s/step - loss: 5.2206e-04 - accuracy: 1.0000 - val_loss: 0.2052 - val_accuracy: 0.9074
Epoch 27/30
9/9 [==============================] - 180s 20s/step - loss: 5.6356e-04 - accuracy: 1.0000 - val_loss: 0.2004 - val_accuracy: 0.9259
Epoch 28/30
9/9 [==============================] - 180s 20s/step - loss: 0.0021 - accuracy: 1.0000 - val_loss: 0.1707 - val_accuracy: 0.9444
Epoch 29/30
9/9 [==============================] - 181s 20s/step - loss: 4.3726e-04 - accuracy: 1.0000 - val_loss: 0.1664 - val_accuracy: 0.9444
Epoch 30/30
9/9 [==============================] - 180s 20s/step - loss: 2.7244e-04 - accuracy: 1.0000 - val_loss: 0.1667 - val_accuracy: 0.9444
Training Accuracy:  0.9906250238418579
Testing Accuracy:  0.953125

Main implementation from paper:
-----------------------------------------------
run time ~=2 hours

softmax activation in final layer:

18 frame video shape:  (384, 16, 128, 128)
Training data shape:  (320, 128, 128, 16, 1)
Training labels shape:  (320, 2)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   

conv3d_6 (Conv3D)            (None, 63, 63, 14, 64)    1792      
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 63, 63, 14, 64)    0         
_________________________________________________________________
conv3d_7 (Conv3D)            (None, 32, 32, 14, 128)   221312    
_________________________________________________________________
batch_normalization_6 (Batch (None, 32, 32, 14, 128)   512       
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 32, 32, 14, 128)   0         
_________________________________________________________________
conv3d_8 (Conv3D)            (None, 16, 16, 14, 256)   884992    
_________________________________________________________________
batch_normalization_7 (Batch (None, 16, 16, 14, 256)   1024      
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 16, 16, 14, 256)   0         
_________________________________________________________________
conv3d_9 (Conv3D)            (None, 8, 8, 7, 512)      3539456   
_________________________________________________________________
batch_normalization_8 (Batch (None, 8, 8, 7, 512)      2048      
_________________________________________________________________
leaky_re_lu_9 (LeakyReLU)    (None, 8, 8, 7, 512)      0         
_________________________________________________________________
conv3d_10 (Conv3D)           (None, 4, 4, 4, 1024)     14156800  
_________________________________________________________________
batch_normalization_9 (Batch (None, 4, 4, 4, 1024)     4096      
_________________________________________________________________
leaky_re_lu_10 (LeakyReLU)   (None, 4, 4, 4, 1024)     0         
_________________________________________________________________
conv3d_11 (Conv3D)           (None, 2, 2, 2, 2048)     56625152  
_________________________________________________________________
batch_normalization_10 (Batc (None, 2, 2, 2, 2048)     8192      
_________________________________________________________________
leaky_re_lu_11 (LeakyReLU)   (None, 2, 2, 2, 2048)     0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
_________________________________________________________________
batch_normalization_11 (Batc (None, 16384)             65536     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 32770     

Total params: 75,543,682
Trainable params: 75,502,978
Non-trainable params: 40,704
_________________________________________________________________
Epoch 1/30
9/9 [==============================] - 183s 20s/step - loss: 1.0211 - accuracy: 0.6805 - val_loss: 0.6653 - val_accuracy: 0.5556
Epoch 2/30
9/9 [==============================] - 180s 20s/step - loss: 0.1559 - accuracy: 0.9248 - val_loss: 0.6163 - val_accuracy: 0.5926
Epoch 3/30
9/9 [==============================] - 181s 20s/step - loss: 0.0756 - accuracy: 0.9774 - val_loss: 0.6363 - val_accuracy: 0.5741
Epoch 4/30
9/9 [==============================] - 179s 20s/step - loss: 0.0607 - accuracy: 0.9774 - val_loss: 0.6031 - val_accuracy: 0.5926
Epoch 5/30
9/9 [==============================] - 180s 20s/step - loss: 0.0059 - accuracy: 0.9962 - val_loss: 0.5660 - val_accuracy: 0.6852
Epoch 6/30
9/9 [==============================] - 180s 20s/step - loss: 0.0120 - accuracy: 0.9962 - val_loss: 0.5595 - val_accuracy: 0.6852
Epoch 7/30
9/9 [==============================] - 180s 20s/step - loss: 0.0427 - accuracy: 0.9812 - val_loss: 0.5376 - val_accuracy: 0.8333
Epoch 8/30
9/9 [==============================] - 184s 20s/step - loss: 0.0134 - accuracy: 0.9962 - val_loss: 0.5088 - val_accuracy: 0.7593
Epoch 9/30
9/9 [==============================] - 179s 20s/step - loss: 0.0024 - accuracy: 1.0000 - val_loss: 0.4788 - val_accuracy: 0.7963
Epoch 10/30
9/9 [==============================] - 181s 20s/step - loss: 0.0025 - accuracy: 1.0000 - val_loss: 0.4569 - val_accuracy: 0.7778
Epoch 11/30
9/9 [==============================] - 181s 20s/step - loss: 0.0020 - accuracy: 1.0000 - val_loss: 0.4390 - val_accuracy: 0.7963
Epoch 12/30
9/9 [==============================] - 180s 20s/step - loss: 6.8972e-04 - accuracy: 1.0000 - val_loss: 0.4162 - val_accuracy: 0.8148
Epoch 13/30
9/9 [==============================] - 181s 20s/step - loss: 0.0024 - accuracy: 1.0000 - val_loss: 0.3847 - val_accuracy: 0.8519
Epoch 14/30
9/9 [==============================] - 179s 20s/step - loss: 4.6133e-04 - accuracy: 1.0000 - val_loss: 0.3601 - val_accuracy: 0.8148
Epoch 15/30
9/9 [==============================] - 179s 20s/step - loss: 0.0043 - accuracy: 1.0000 - val_loss: 0.3436 - val_accuracy: 0.8519
Epoch 16/30
9/9 [==============================] - 178s 20s/step - loss: 0.0062 - accuracy: 1.0000 - val_loss: 0.3493 - val_accuracy: 0.8333
Epoch 17/30
9/9 [==============================] - 181s 20s/step - loss: 0.0222 - accuracy: 0.9925 - val_loss: 0.3731 - val_accuracy: 0.8519
Epoch 18/30
9/9 [==============================] - 180s 20s/step - loss: 0.0374 - accuracy: 0.9887 - val_loss: 0.2749 - val_accuracy: 0.8333
Epoch 19/30
9/9 [==============================] - 179s 20s/step - loss: 0.0053 - accuracy: 1.0000 - val_loss: 0.2546 - val_accuracy: 0.8519
Epoch 20/30
9/9 [==============================] - 180s 20s/step - loss: 3.9174e-04 - accuracy: 1.0000 - val_loss: 0.2724 - val_accuracy: 0.8333
Epoch 21/30
9/9 [==============================] - 182s 20s/step - loss: 4.6239e-04 - accuracy: 1.0000 - val_loss: 0.2839 - val_accuracy: 0.8333
Epoch 22/30
9/9 [==============================] - 180s 20s/step - loss: 6.5405e-04 - accuracy: 1.0000 - val_loss: 0.2944 - val_accuracy: 0.8333
Epoch 23/30
9/9 [==============================] - 181s 20s/step - loss: 0.0013 - accuracy: 1.0000 - val_loss: 0.2910 - val_accuracy: 0.8333
Epoch 24/30
9/9 [==============================] - 181s 20s/step - loss: 1.5003e-04 - accuracy: 1.0000 - val_loss: 0.2902 - val_accuracy: 0.8333
Epoch 25/30
9/9 [==============================] - 178s 20s/step - loss: 9.8492e-06 - accuracy: 1.0000 - val_loss: 0.2938 - val_accuracy: 0.8333
Epoch 26/30
9/9 [==============================] - 180s 20s/step - loss: 5.6395e-06 - accuracy: 1.0000 - val_loss: 0.2988 - val_accuracy: 0.8333
Epoch 27/30
9/9 [==============================] - 181s 20s/step - loss: 3.7250e-05 - accuracy: 1.0000 - val_loss: 0.3049 - val_accuracy: 0.8333
Epoch 28/30
9/9 [==============================] - 179s 20s/step - loss: 1.6541e-05 - accuracy: 1.0000 - val_loss: 0.3109 - val_accuracy: 0.8333
Epoch 29/30
9/9 [==============================] - 181s 20s/step - loss: 4.7697e-04 - accuracy: 1.0000 - val_loss: 0.3175 - val_accuracy: 0.8333
Epoch 30/30
9/9 [==============================] - 179s 20s/step - loss: 1.0596e-05 - accuracy: 1.0000 - val_loss: 0.3233 - val_accuracy: 0.8333
Training Accuracy:  0.971875011920929
Testing Accuracy:  0.859375