## Notebook using pre-defined Train-Test Split

In [2]:
import sys
sys.path.append('./src')
from config import *

In [3]:
import mPyPl as mp
import mPyPl as mp
import mPyPl.utils.image as mpui
from mpyplx import *
from pipe import Pipe
from moviepy.editor import *
import numpy as np
import itertools
import cv2
import math
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import *
from keras.regularizers import l2


In [4]:
classes = mp.get_classes(data_dir)
print(classes)
classes = { 'noshot' : 1, 'shot': 2}
print(classes)

stream = mp.get_datastream(data_dir,classes=classes,ext=".resized.mp4")

In [5]:
stream | mp.take(5) | mp.as_list

In [6]:
classes = mp.get_classes(data_dir)
print(classes)

In [7]:
test_names = (
   from_json(os.path.join(source_dir,'matches.json'))
 | mp.where(lambda x: 'Test' in x.keys() and int(x['Test'])>0)
 | mp.apply(['Id','Half'],'pattern',lambda x: "{}_{}_".format(x[0],x[1]))
 | mp.select_field('pattern')
 | mp.as_list)

data = (
   mp.get_datastream(data_dir,classes=classes,ext=".resized.mp4") 
 | datasplit_by_pattern(test_pattern=test_names)
 | mp.sapply( 'class_id', lambda x: 2-(1 if x==0 else x) )
 | stratify_sample_tt()
 | mp.apply(['class_id','split'],'descr',lambda x: "{}-{}".format(x[0],x[1]))
 | summarize('descr')
 | mp.as_list)


In [8]:
print(data | mp.take(5) | mp.as_list )

In [9]:
def printf(x,y):
    print(x)
    return y

In [10]:
data = (data
 | mp.apply('filename','audio',lambda x: np.load(x.replace('.resized.mp4','.audiofeatures.npy')),eval_strategy=mp.EvalStrategies.OnDemand)
 | mp.apply('audio','audio_third',lambda x: x.reshape( (34,200,1) ),eval_strategy=mp.EvalStrategies.LazyMemoized)
 | mp.apply('filename','vgg',lambda x: np.load(x.replace('.resized.mp4','.vgg.npy')),eval_strategy=mp.EvalStrategies.OnDemand)
 | mp.apply('vgg','vggflat',lambda x: np.reshape(x,(no_frames,-1,1)),eval_strategy=mp.EvalStrategies.LazyMemoized)
 | mp.as_list
)

In [11]:
data[0]["audio_third"].shape

In [19]:
trainstream, valstream = data | mp.make_train_test_split
no_train = data | mp.filter('split',lambda x: x==mp.SplitType.Train) | mp.count
no_test = data | mp.filter('split',lambda x: x==mp.SplitType.Test) | mp.count
print("Train={}, Test={}".format(no_train,no_test))

In [20]:
# audio CNN
batchsize=32

model_audio = Sequential()
model_audio.add(Conv2D(32, (1, 3), input_shape=(34, 200, 1), data_format='channels_last',activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model_audio.add(AveragePooling2D((1, 2)))
model_audio.add(Conv2D(16, (1, 3) ,activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model_audio.add(AveragePooling2D((1, 2)))
model_audio.add(Flatten())
model_audio.add(Dropout(0.5))
model_audio.add(Dense(10,activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))

model_audio.summary()

In [21]:

no_frames=126

model_vgg = Sequential()
model_vgg.add(AveragePooling2D((12,12),input_shape=(no_frames, 16384, 1)))
model_vgg.add(Conv2D(8, (3, 3), data_format='channels_last',activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model_vgg.add(AveragePooling2D((2, 2)))
model_vgg.add(Conv2D(16, (3, 3) ,activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model_vgg.add(AveragePooling2D((2, 2)))
model_vgg.add(Flatten())
model_vgg.add(Dropout(0.5))
model_vgg.add(Dense(10,activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))

model_vgg.summary()

In [22]:
from keras.models import Model

input1 = Input(shape=(no_frames, 16384, 1))
input2 = Input(shape=(34, 200, 1))
concat = concatenate([model_vgg(input1),model_audio(input2)])
output = Dense(1, activation='sigmoid',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01))(concat)
                                                                                                        
model = Model(input=[input1,input2],output=output)
model.summary()

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['acc'])

filepath = "weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"

checkpoint =  keras.callbacks.ModelCheckpoint(filepath, 
                          monitor='val_loss', 
                          verbose=0, 
                          save_best_only=True, 
                          save_weights_only=False, 
                          mode='auto', 
                          period=1)
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',
                              min_delta=0.0001,
                              patience=6,
                              verbose=0, mode='auto')

In [23]:
valstream = valstream | mp.as_list

history = model.fit_generator(
      trainstream | mp.infshuffle |  mp.as_batch( ['vggflat','audio_third'], 'class_id', batchsize=batchsize),
      steps_per_epoch=no_train // batchsize,
      validation_data= valstream | mp.infshuffle | mp.as_batch(['vggflat','audio_third'], 'class_id', batchsize=batchsize),
      validation_steps = no_test // batchsize,
      epochs=20, verbose=1)

In [None]:
model.save("audio-features-300-34-1.pickle")

In [32]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [33]:
model.save('model.vgg.audio.pickle')

In [None]:
test_examples = valstream  | mp.select_field("vggflat") | mp.as_npy

print(test_examples.shape)

preds = model.predict(test_examples)

In [None]:
valstream | mp.fapply("pred_acc", lambda x: getc ) | execute

In [None]:
v