In [1]:
import os
# specify which GPU to use
# And also limit which GPU consumes memory (especially for tensorflow use)
os.environ['CUDA_VISIBLE_DEVICES']='1'
os.system('echo $CUDA_VISIBLE_DEVICES')

0

In [2]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
config = tf.ConfigProto()
# set tensorflow not consume all memory, dynamically allocate memory 
config.gpu_options.allow_growth = True
set_session(tf.Session(config=config))

In [5]:
from keras.models import Model
from keras.layers import Input, Conv3D, MaxPool3D, Flatten, Dense, Dropout

Build the following model:
<img src="./3D_CNN.jpg">

In [13]:
# define input
input_video = Input(shape=(224,224,16,3)) # input 16 frames, each frame has 3 channels

In [None]:
# use 3x3x3 kernel, # use funtional api 

In [16]:
# 1st layer convolutional layer
x = Conv3D(filters=64, kernel_size=(3,3,3), padding='same', activation='relu')(input_video)
# pooling
x = MaxPool3D(pool_size=(2,2,1))(x) # at the first pooling , temporal dimension no compressed
# 2nd layer convolutional layer
x = Conv3D(filters=128, kernel_size=(3,3,3), padding='same', activation='relu')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # start from 2nd pooling , temporal dimension compressed

# 3rd layer convolutional layer (use double conv, to obtain stronger non-linear capability)
x = Conv3D(filters=256, kernel_size=(3,3,3), padding='same', activation='relu')(x)
x = Conv3D(filters=256, kernel_size=(3,3,3), padding='same', activation='relu')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # temporal dimension compressed

# 4th layer convolutional layer (use double conv, to obtain stronger non-linear capability)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu')(x)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # temporal dimension compressed

# 5th layer convolutional layer (use double conv, to obtain stronger non-linear capability)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu')(x)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # temporal dimension compressed

# flatten to be one-dim vector
x = Flatten()(x)

In [17]:
# fully-connected layer fc6
x = Dense(units=4096, activation='relu')(x)
# dropout
x = Dropout(0.5)(x)
# fully-connected layer fc7
x = Dense(units=4096, activation='relu')(x)
# dropout
x = Dropout(0.5)(x)
# build a softmax (prob.), use y as output
y = Dense(10, activation='softmax')(x)
# FINISH the layers definition

In [18]:
# Define model (connect input & output)
model = Model(input_video, y)

In [19]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 16, 3)   0         
_________________________________________________________________
conv3d_17 (Conv3D)           (None, 224, 224, 16, 64)  5248      
_________________________________________________________________
max_pooling3d_11 (MaxPooling (None, 112, 112, 16, 64)  0         
_________________________________________________________________
conv3d_18 (Conv3D)           (None, 112, 112, 16, 128) 221312    
_________________________________________________________________
max_pooling3d_12 (MaxPooling (None, 56, 56, 8, 128)    0         
_________________________________________________________________
conv3d_19 (Conv3D)           (None, 56, 56, 8, 256)    884992    
_________________________________________________________________
conv3d_20 (Conv3D)           (None, 56, 56, 8, 256)    1769728   
__________

In [30]:
# 先讓network model跑起來, use random array to substitude as input
import numpy as np
# create 100 samples (each one is 224x224x16x3 dim)
x_train= np.random.random(size=(100, 224, 224, 16, 3))
# create labels, 10 classes
# construct a 100 randint array
y_train = np.asarray([np.random.randint(0,10) for i in range(100)])
# labels need to be one-hot form
from keras.utils import to_categorical
y_train = to_categorical(y_train,10) # num_classes=10

In [25]:
#[np.random.randint(0,10) for i in range(10)]

[5, 2, 3, 7, 8, 4, 6, 8, 3, 3]

In [32]:
# Before runing the model, we have to compile it.
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [33]:
# Running the model (Perform training), batch可以小一點, 交叉驗證的比例是10%
model.fit(x_train, y_train, batch_size=2, verbose=1, validation_split=0.1, epochs=10)

Train on 90 samples, validate on 10 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f37e4e7a400>

In [36]:
# add initlizer for layer weights
from keras import initializers
weight_init = initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=None)

In [38]:
# define input
input_video = Input(shape=(224,224,16,3)) # input 16 frames, each frame has 3 channels

# 1st layer convolutional layer
x = Conv3D(filters=64, kernel_size=(3,3,3), padding='same', activation='relu', 
           kernel_initializer=weight_init, bias_initializer='zeros')(input_video)
# pooling
x = MaxPool3D(pool_size=(2,2,1))(x) # at the first pooling , temporal dimension no compressed
# 2nd layer convolutional layer
x = Conv3D(filters=128, kernel_size=(3,3,3), padding='same', activation='relu', 
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # start from 2nd pooling , temporal dimension compressed

# 3rd layer convolutional layer (use double conv, to obtain stronger non-linear capability)
x = Conv3D(filters=256, kernel_size=(3,3,3), padding='same', activation='relu', 
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
x = Conv3D(filters=256, kernel_size=(3,3,3), padding='same', activation='relu',
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # temporal dimension compressed

# 4th layer convolutional layer (use double conv, to obtain stronger non-linear capability)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu',
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu',
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # temporal dimension compressed

# 5th layer convolutional layer (use double conv, to obtain stronger non-linear capability)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu',
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
x = Conv3D(filters=512, kernel_size=(3,3,3), padding='same', activation='relu',
          kernel_initializer=weight_init, bias_initializer='zeros')(x)
# pooling
x = MaxPool3D(pool_size=(2,2,2))(x) # temporal dimension compressed

# flatten to be one-dim vector
x = Flatten()(x)

# fully-connected layer fc6
x = Dense(units=4096, activation='relu',
         kernel_initializer=weight_init, bias_initializer='zeros')(x)
# dropout
x = Dropout(0.5)(x)
# fully-connected layer fc7
x = Dense(units=4096, activation='relu',
         kernel_initializer=weight_init, bias_initializer='zeros')(x)
# dropout
x = Dropout(0.5)(x)
# build a softmax (prob.), use y as output
y = Dense(10, activation='softmax')(x)
# FINISH the layers definition

# Define model (connect input & output)
model = Model(input_video, y)

In [39]:
# Before runing the model, we have to compile it.
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [41]:
# Running the model (Perform training), batch可以小一點, 交叉驗證的比例是10%
model.fit(x_train, y_train, batch_size=1, verbose=1, validation_split=0.1, epochs=10)

Train on 90 samples, validate on 10 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f37ddab7128>