# Exploratory coding for I3D
## Includes code snippets to transform input data into format compatible with a pretrained I3D model from DeepMind

In [1]:
# Generator class
import video_generator

# I3D model functions
from i3d_inception import Inception_Inflated3d
from i3d_inception import conv3d_bn

# Keras
from keras import backend as K
from keras.models import Model
from keras.layers import Activation
from keras.layers import Add
from keras.layers import Dropout
from keras.layers import Reshape
from keras.layers import Lambda

Using TensorFlow backend.


In [7]:
# Directories where training and test sets reside

test_dir = './data/val'
train_dir = './data/train'
dims = (250,224,224,3)
batch_size = 4
videogen = video_generator.VideoGenerator(train_dir, test_dir, dims, batch_size)

In [8]:
# training/testing data generators and hyperparameters

training_generator = videogen.generate(train_or_test='train')
training_steps_per_epoch = len(videogen.filenames_train) // batch_size
testing_generator = videogen.generate(train_or_test="val")
testing_steps_per_epoch = len(videogen.filenames_test) // batch_size

In [9]:
# Input dimensions and dropout probability

NUM_FRAMES=250
FRAME_HEIGHT=224
FRAME_WIDTH=224
NUM_CLASSES=2
dropout_prob=0.5

In [10]:
# 1x1x1 3d convolution + logit function for video segment classification

def generate_logit(x, last_conv3d_name, classes):
    
    x = conv3d_bn(x, classes, 1, 1, 1, padding='same',
                   use_bias=True, use_activation_fn=False, use_bn=False, name=last_conv3d_name)

    num_frames_remaining = int(x.shape[1])
    x = Reshape((num_frames_remaining, classes))(x)

    x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
               output_shape=lambda s: (s[0], s[2]))(x)    
    
    return x

# Single stream 3D convolution model in RGB channel using ImageNet/Kinetics weights

def RGB_model(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_CLASSES, dropout_prob):

    rgb_model = Inception_Inflated3d(
        include_top=False,
        weights='rgb_imagenet_and_kinetics',
        input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, 3))

    x1 = rgb_model.layers[-1].output
    x1 = Dropout(dropout_prob)(x1)

    x1 = generate_logit(x1, '1x1_Conv3d_rgb_logits', NUM_CLASSES)

    x = Activation('softmax', name='prediction')(x1)

    model = Model(input=rgb_model.input, output=x)

    return model

In [11]:
test_model = RGB_model(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_CLASSES, dropout_prob)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [12]:
test_model.save('2019_0611_I3D_goaldetect_RGBonly_2epoch.h5')

In [14]:
!ls -al

total 48732
drwxrwxr-x  6 ubuntu ubuntu     4096 Jun 11 23:39 .
drwxr-xr-x 24 ubuntu ubuntu     4096 Jun 11 19:51 ..
-rw-rw-r--  1 ubuntu ubuntu 49707184 Jun 11 23:39 2019_0611_I3D_goaldetect_RGBonly_2epoch.h5
drwxrwxr-x  7 ubuntu ubuntu     4096 Jun 11 23:26 data
drwxrwxr-x  8 ubuntu ubuntu     4096 Jun 11 19:34 .git
-rw-rw-r--  1 ubuntu ubuntu     8003 Jun 11 23:39 I3D_end_to_end_dev.ipynb
-rw-rw-r--  1 ubuntu ubuntu    98277 Jun 11 19:46 I3D_exploratory.ipynb
-rw-rw-r--  1 ubuntu ubuntu    26405 Jun 11 19:34 i3d_inception.py
drwxrwxr-x  2 ubuntu ubuntu     4096 Jun 11 19:48 .ipynb_checkpoints
-rw-rw-r--  1 ubuntu ubuntu     1060 Jun 11 19:34 LICENSE
-rw-------  1 ubuntu ubuntu        0 Jun 11 23:13 nohup.out
drwxrwxr-x  2 ubuntu ubuntu     4096 Jun 11 19:48 __pycache__
-rw-rw-r--  1 ubuntu ubuntu       63 Jun 11 23:11 something.py
-rw-rw-r--  1 ubuntu ubuntu        0 Jun 11 23:14 train_single_stream_model.py
-rw-rw-r--  1 ubuntu ubuntu     8537 Jun 11 23:31 train_test

In [7]:
# helper code to freeze layers
def freeze_RGB_model(model, trainable=False):
    freeze_layers = model.layers[:-5]
    for layer in freeze_layers:
        layer.trainable=trainable

In [8]:
freeze_RGB_model(test_model)

In [9]:
test_model.layers[1].trainable

False

In [11]:
test_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Fit the model.

test_model.fit_generator(training_generator,
                         steps_per_epoch=training_steps_per_epoch,
                         epochs=2,
                         validation_data=testing_generator,
                         validation_steps=testing_steps_per_epoch)

Instructions for updating:
Use tf.cast instead.
Epoch 1/2
Epoch 2/2

In [None]:
class Annotator():
    
    def __init__(self, filepath, output_dir, model, model_params, step_size=5)
    
        self.model = model
        self.model.load(model_params)
    
        self.src = cv2.VideoCapture(filepath)
        self.output_dir = os. output_dir
    y = []
    
    while True:
        # some function to pull frames and create a numpy array
        test_frame = frame_capture(src)
        label = model.predict()
        y.append(label)
        
        