In [1]:
#Functional API fusion model
import keras
import tensorflow
#tensorflow.debugging.set_log_device_placement(True)
print("Num GPUs Available: ", len(tensorflow.config.list_physical_devices('GPU')))
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.models import Model
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers import Flatten
from keras.layers import BatchNormalization
from keras.layers import Conv1D
from keras.layers import Conv2D
from keras.layers import Conv2DTranspose
from keras.layers import Conv3D
from keras.layers import LeakyReLU
from keras.layers import Dropout
from keras.layers import MaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import Concatenate
from keras.layers import Input
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import Adam
from keras.utils import to_categorical
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import datetime

Num GPUs Available:  4


In [2]:
gpus = tensorflow.config.list_physical_devices('GPU')


strategy = tensorflow.distribute.MirroredStrategy(devices=["/gpu:2", "/gpu:3"])

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')


In [3]:
load = np.load('data/BAUM_basic.npz',allow_pickle=True)
audio = load['audio']
video = load['video']
labels = load['labels']
print(np.array(video.shape))

[520 115 128 128]


In [4]:
#video pre-process

fraction = 1
frames = int(115/fraction) #115
shape = (128,128,frames,1)

video_input = (128, 128, frames, 1)
video_shape = (video.shape[0], 128, 128, frames, 1)
video = np.reshape(video, video_shape)

In [5]:
#audio pre-process
num_rows = 84672
num_columns = 1
num_channels = 1

audio_input = (num_rows, num_columns)
audio_shape = (audio.shape[0], num_rows, num_columns)
audio = np.reshape(audio, audio_shape)

In [6]:
#labels = labels - 1
#print(labels[101])
labels = to_categorical(labels)
target_names = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']


v_train, v_test, a_train, a_test, l_train, l_test = train_test_split(video, 
                                                                     audio, 
                                                                     labels,
                                                                     test_size=0.166,
                                                                     random_state=42) #size=0.2

print(v_train.shape)
print(l_train[95])
print(a_train.shape)

(433, 128, 128, 115, 1)
[0. 0. 0. 0. 1. 0.]
(433, 84672, 1)


In [7]:
#Audio Network
def audio_cnn_1d(input_):
    #Layer 1
    conv1d = Conv1D(padding='same', filters=25, kernel_size=64, strides = 8, input_shape=audio_input)(input_)
    relu = LeakyReLU(alpha=0.2)(conv1d)
    
    #Layer 2
    conv1d = Conv1D(padding='same', filters=25, kernel_size=128, strides = 8)(relu)
    batch = BatchNormalization()(conv1d)
    relu = LeakyReLU(alpha=0.2)(batch)
    
    #Layer 3
    conv1d = Conv1D(padding='same', filters=25, kernel_size=256, strides = 8)(relu)
    batch = BatchNormalization()(conv1d)
    relu = LeakyReLU(alpha=0.2)(batch)
    
    #Layer 4
    conv1d = Conv1D(padding='same', filters=25, kernel_size=512, strides = 4)(relu)
    batch = BatchNormalization()(conv1d)
    relu = LeakyReLU(alpha=0.2)(batch)
    
    #Layer 5
    conv1d = Conv1D(padding='same', filters=25, kernel_size=1024, strides = 4)(relu)
    batch = BatchNormalization()(conv1d)
    relu = LeakyReLU(alpha=0.2)(relu)
    
    #layer 6
    output = Flatten()(relu)
    
    return output

In [8]:
#video neural network
def video_cnn_3d(input_):
    #layer 1
    conv3d = Conv3D(64, kernel_size=(3,3,3), strides=(2,2,1), input_shape=video_input)(input_)
    relu = LeakyReLU(alpha=0.2)(conv3d)
    
    #layer 2
    conv3d = Conv3D(128, kernel_size=(3,3,3), strides=(2,2,1), padding='SAME')(relu)
    batch = BatchNormalization()(conv3d)
    relu = LeakyReLU(alpha=0.2)(batch)
    
    #layer 3
    conv3d = Conv3D(256, kernel_size=(3,3,3), strides=(2,2,1), padding='SAME')(relu)
    batch = BatchNormalization()(conv3d)
    relu = LeakyReLU(alpha=0.2)(batch)

    #layer 4
    conv3d = Conv3D(512, kernel_size=(3,3,3), strides=(2,2,2), padding='SAME')(relu)
    batch = BatchNormalization()(conv3d)
    relu = LeakyReLU(alpha=0.2)(batch)

    #layer 5
    conv3d = Conv3D(1024, kernel_size=(3,3,3), strides=(2,2,2), padding='SAME')(relu)
    batch = BatchNormalization()(conv3d)
    relu = LeakyReLU(alpha=0.2)(batch)

    #layer 6
    conv3d = Conv3D(2048, kernel_size=(3,3,3), strides=(2,2,2), padding='SAME')(relu)
    batch = BatchNormalization()(conv3d)
    relu = LeakyReLU(alpha=0.2)(batch)

    #layer 7
    output = Flatten()(relu)
    return output


In [9]:
with strategy.scope():
#fusion layer
    audio = Input(audio_input)
    video = Input(video_input)

    audio_tensor = audio_cnn_1d(audio)

    video_tensor = video_cnn_3d(video)

    fusion = Concatenate()([audio_tensor, video_tensor]) #combining tensors
    batch = BatchNormalization()(fusion)
    prediction = Dense(6,activation='sigmoid')(batch) #softmax
    twin_net = Model(inputs=[audio,video], outputs=prediction)

    twin_net.compile(loss='binary_crossentropy', #categorical_crossentropy
            optimizer=optimizers.Adam(lr=0.00001), 
                     metrics=['accuracy'])

    twin_net.summary()

    checkpoint_filepath = 'models/baum_fusion_basic_sigmoid'
    model_checkpoint_callback = ModelCheckpoint(
        filepath=checkpoint_filepath
        )
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + checkpoint_filepath
    tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    #twin_net.save('models/baum_fusion')
    #print('saved')
    twin_net.fit([a_train,v_train], l_train,
            batch_size=31,
            epochs=30,
            verbose=1,
            validation_split=0.166,
            callbacks=[model_checkpoint_callback,
            tensorboard_callback])

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

Epoch 1/30
INFO:tensorflow:batch_all_reduce: 40 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 40 all-reduces with algorithm = nccl, num_packs = 1

KeyboardInterrupt: 

In [None]:
with strategy.scope():
    #test softmax
    model = load_model('models/baum_fusion_sig_extended')
    checkpoint_filepath = 'models/baum_fusion_sig_extended'
    model_checkpoint_callback = ModelCheckpoint(
        filepath=checkpoint_filepath
        )
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + checkpoint_filepath
    tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    #twin_net.save('models/baum_fusion')
    #print('saved')
    model.fit([a_train,v_train], l_train,
            batch_size=31,
            epochs=173,
            verbose=1,
            validation_split=0.166,
            callbacks=[model_checkpoint_callback,
            tensorboard_callback])
    

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs/fit --host localhost --port 8000

In [16]:
with strategy.scope():
    #test softmax
    model = load_model('models/baum_fusion_basic_sigmoid')


    score = model.evaluate([a_train, v_train], l_train, verbose=0)
    print("Training Accuracy: ", score)

    score = model.evaluate([a_test, v_test], l_test, verbose=0)
    print("Testing Accuracy: ", score)


    Y_pred = model.predict([a_test, v_test])
    y_pred = np.argmax(Y_pred, axis=1)
    labels = np.argmax(l_test, axis=1)
    print('Confusion_Matrix')
    print(confusion_matrix(labels, y_pred))


Training Accuracy:  [0.28377383947372437, 0.9099307060241699]
Testing Accuracy:  [1.0247271060943604, 0.39080458879470825]
Confusion_Matrix
[[ 0  1  0  0  2  0]
 [ 0  6  3  5  4  1]
 [ 0  1  2  1  1  0]
 [ 3  4  3 15  4  2]
 [ 3  1  0  3 10  2]
 [ 1  1  2  2  3  1]]


In [None]:
soft 300 epoch
Training Accuracy:  [1.4789059162139893, 0.9099307060241699]
Testing Accuracy:  [11.502437591552734, 0.4137931168079376]
Confusion_Matrix
[[ 0  1  0  0  2  0]
 [ 2  6  0  6  4  1]
 [ 0  1  1  1  1  1]
 [ 2  6  0 16  6  1]
 [ 3  1  0  2 11  2]
 [ 0  1  0  3  4  2]]

sig 300 epoch
Training Accuracy:  [0.12294566631317139, 0.912240207195282]
Testing Accuracy:  [1.0002453327178955, 0.4252873659133911]
Confusion_Matrix
[[ 0  1  0  0  2  0]
 [ 1  5  1  7  5  0]
 [ 0  1  2  1  1  0]
 [ 2  5  1 18  4  1]
 [ 3  0  0  4 11  1]
 [ 2  1  0  2  4  1]]

baum basic 6 softmax
Training Accuracy:  [0.459194540977478, 0.9053117632865906]
Testing Accuracy:  [3.032977819442749, 0.3563218414783478]
Confusion_Matrix
[[ 0  0  0  0  2  1]
 [ 1  3  0  9  3  3]
 [ 0  1  1  1  1  1]
 [ 1  4  2 20  4  0]
 [ 5  2  0  4  6  2]
 [ 1  1  0  4  3  1]]

Baum basic 6 sigmoid
Training Accuracy:  [0.27888423204421997, 0.9053117632865906]
Testing Accuracy:  [0.9529832601547241, 0.40229883790016174]
Confusion_Matrix
[[ 0  1  0  1  1  0]
 [ 1  5  1  7  4  1]
 [ 0  1  2  1  1  0]
 [ 2  6  3 17  2  1]
 [ 4  0  0  4  9  2]
 [ 0  1  0  4  3  2]]

Baum all
Training Accuracy:  [0.27805307507514954, 0.8719576597213745]
Testing Accuracy:  [0.8749507665634155, 0.24338623881340027]
Confusion_Matrix
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  1  0  0  0  1  3  1  2  1  0  0]
 [ 0  0  2  0  0  0  0  1  0  0  0  0  0  0]
 [ 0  0  1  1  0  1  2  1  2  3  4  0  2  0]
 [ 0  0  0  0  2  0  1  0  0  0  0  0  0  3]
 [ 0  0  0  0  0  1  0  0  0  0  0  0  1  0]
 [ 0  0  1  1  0  0  4  2  1  1  2  0  0  0]
 [ 0  0  2  0  0  0  0  1  0  1  2  0  0  0]
 [ 1  4  0  0  1  1  4  1  9  3  2  0  3  0]
 [ 0  0  1  1  1  0  2  1  2 10  2  2  4  3]
 [ 0  3  0  0  0  0  4  0  1  5  7  2  0  1]
 [ 0  2  1  1  0  0  3  1  1  0  0  0  0  1]
 [ 0  0  0  1  2  1  0  2  1  7  3  2  3  2]
 [ 0  0  0  0  0  0  2  0  0  2  6  1  2  6]]