In [23]:
%matplotlib inline

from __future__ import print_function
import keras
import numpy as np
import pickle 

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Merge
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras import backend as K
from keras.utils import plot_model
from cv2 import imread

from matplotlib import pyplot as plt
from IPython.display import clear_output

from data import getVideoData
from data import getAudioData
from data import getFusionData

In [24]:
def getModelArchitecture(input_shape, final_pool):
    cmodel = Sequential()

    #cmodel.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, padding = 'same'))
    cmodel.add(Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=input_shape, padding = 'same'))
    #cmodel.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
    cmodel.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='same'))
    cmodel.add(Dropout(0.25))

    cmodel.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
    #cmodel.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
    cmodel.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='same'))
    cmodel.add(Dropout(0.25))

    #cmodel.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
    cmodel.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
    #cmodel.add(Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'))
    cmodel.add(MaxPooling2D(pool_size=(2, 2), strides=2, padding='same'))
    cmodel.add(Dropout(0.25))

    #cmodel.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
    cmodel.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
    #cmodel.add(Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same'))
    cmodel.add(MaxPooling2D(pool_size=(final_pool), strides=None, padding='same'))
    cmodel.add(Dropout(0.25))

#    cmodel.add(Flatten())
#    cmodel.add(Dense(32, activation='sigmoid'))
#    cmodel.add(Dropout(0.25))
#    cmodel.add(Dense(2, activation='sigmoid'))

    print(cmodel.summary())

    return cmodel

In [25]:
def fusionBranch(video_branch, audio_branch):

    final = Sequential()
    final.add(Merge([video_branch, audio_branch]))
    #final.add(Dense(512, activation='relu'))
    final.add(Dense(128, activation='relu'))
    final.add(Flatten())
    final.add(Dense(2, activation='softmax'))

    final.compile(loss='categorical_crossentropy',
            optimizer=keras.optimizers.Adadelta(),
            #optimizer='adam',
            metrics=['accuracy']
            )
    
    return final

def trainFinal(final):
    print('Getting data...')
    audio_video_data_tuple, label_on_correspondence = getFusionData()
    print('Data ready')

    final.fit(audio_video_data_tuple, label_on_correspondence,
            batch_size=10, epochs=20, verbose=1)

    return final

### Test code below for sub-model training.

In [26]:
def addFCLayers(cmodel):

    cmodel.add(Flatten())
    cmodel.add(Dense(32, activation='sigmoid'))
    cmodel.add(Dropout(0.25))
    cmodel.add(Dense(2, activation='sigmoid'))

    return cmodel
    

def trainModel(model, x_train, y_train, x_test, y_test):
    model.compile(loss='categorical_crossentropy', 
            optimizer=keras.optimizers.Adadelta(),
            metrics=['accuracy'])

    model.fit(x_train, y_train, 
            batch_size=8,
            verbose=1,
            validation_data=(x_test, y_test))

    return model

## Code for building submodels.

In [None]:
def vidMain():
    model = getModelArchitecture((224, 224, 3), (28, 28))
    model = addFCLayers(model)
    model = getTrainedModel(model, dataFactory = getVideoData)

    return model

def audioMain():
    model = getModelArchitecture((199, 257, 3), (25, 33))
    model = addFCLayers(model)
    model = getTrainedModel(model, dataFactory = getAudioData)

## Main

In [None]:
def fusedMain():
    vmodel = getModelArchitecture((224, 224, 3), (28, 28))
    amodel = getModelArchitecture((199, 257, 3), (25, 33))
    
    f = fusionBranch(vmodel, amodel)
    f = trainFinal(f)

    return f

if __name__ == '__main__':
    fusedMain()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 224, 224, 16)      448       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 112, 112, 16)      0         
_________________________________________________________________
dropout_25 (Dropout)         (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 112, 112, 32)      4640      
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 56, 56, 32)        0         
_________________________________________________________________
dropout_26 (Dropout)         (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 56, 56, 64)        18496     
__________

  after removing the cwd from sys.path.


(1600, 224, 224, 3)
(1600, 224, 224, 3) (1600, 2)
Data ready
Epoch 1/20
