## Notebook using pre-defined Train-Test Split

In [1]:
import sys
sys.path.append('./src')
from config import *

In [2]:
import mPyPl as mp
import mPyPl as mp
import mPyPl.utils.image as mpui
from mpyplx import *
from pipe import Pipe
from moviepy.editor import *
import numpy as np
import itertools
import cv2
import pickle
import math
import matplotlib.pyplot as plt
import keras
import json
from keras.models import Sequential
from keras.layers import *
from keras.regularizers import l2
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction


In [3]:
data_dir

In [5]:
classes = mp.get_classes(data_dir)
print(classes)
classes = { 'attack' : 0, 'noshot': 1, 'shot': 2}
print(classes)

In [7]:
test_names = (
   from_json(os.path.join(source_dir,'matches.json'))
 | mp.where(lambda x: 'Test' in x.keys() and int(x['Test'])>0)
 | mp.apply(['Id','Half'],'pattern',lambda x: "{}_{}_".format(x[0],x[1]))
 | mp.select_field('pattern')
 | mp.as_list)

data = (
   mp.get_datastream(data_dir,classes=classes,ext=".resized.mp4") 
 | datasplit_by_pattern(test_pattern=test_names)
 | mp.sapply( 'class_id', lambda x: 2-(1 if x==0 else x) )
 | stratify_sample_tt()
 | mp.apply(['class_id','split'],'descr',lambda x: "{}-{}".format(x[0],x[1]))
 | summarize('descr')
 | mp.as_list)


In [8]:
print(data | mp.take(5) | mp.as_list )

In [6]:
def printf(x,y):
    print(x)
    return y

**NB:** the following cell will take a long time to run, as it pre-computes all audio features

In [10]:
snd_sampling = 5.0/126.0*audio_rate
data_audio = (data
 | mp.silly_progress(elements=10000)
 | load_moviepy_video()
 | mp.apply('video','audio',lambda x: audioFeatureExtraction.stFeatureExtraction(audioBasicIO.stereo2mono(x.audio.to_soundarray()),audio_rate, 2.0*snd_sampling,snd_sampling)[0])
 | close_moviepy_video()
 | mp.inspect()
 | mp.as_list
)

In [12]:
## Save audio features for future use
pickle.dump(data_audio, open('sound_data_set.pickled','wb'))

In [4]:
data_audio = pickle.load(open('x:/data_etc/sound_data_set.pickled','rb'))

In [5]:
maxes = data_audio | mp.select_field('audio') | mp.select(lambda x: np.max(x,axis=1)) | mp.as_npy
features_max = list(np.max(maxes,axis=0))
mins = data_audio | mp.select_field('audio') | mp.select(lambda x: np.min(x,axis=1)) | mp.as_npy
features_min = [0.0 if 0<x<0.01 else x for x in np.min(mins,axis=0)]
audio_minimax = list(zip(features_min,features_max))
print(audio_minimax)

In [38]:
## Save audio minimax values for inferencing
json.dump(audio_minimax, open('audio_minimax.json','w'))

In [7]:
audio_minimax = np.array(audio_minimax)
mn = audio_minimax[:,0]
wd = audio_minimax[:,1]-audio_minimax[:,0]

def norm(x):
    return ((x.transpose()-mn)/wd).transpose()

trainstream, valstream = (data_audio 
 | mp.apply('audio','audiox',lambda x: np.expand_dims(norm(x),axis=2))
 | mp.inspect 
 | mp.make_train_test_split)
                          
no_train = data_audio | mp.filter('split',lambda x: x==mp.SplitType.Train) | mp.count
no_test = data_audio | mp.filter('split',lambda x: x==mp.SplitType.Test) | mp.count
print("Train={}, Test={}".format(no_train,no_test))

In [8]:
batchsize=32

model = Sequential()
model.add(Conv2D(32, (1, 3), input_shape=(34, 126, 1), data_format='channels_last',activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model.add(AveragePooling2D((1, 2)))
model.add(Conv2D(16, (1, 3) ,activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model.add(AveragePooling2D((1, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(10,activation='relu',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))
model.add(Dense(1,activation='sigmoid',kernel_initializer='glorot_uniform',kernel_regularizer=l2(0.01)))

model.compile(loss='binary_crossentropy',
              optimizer=keras.optimizers.Adam(),
              metrics=['acc'])
model.summary()

In [9]:
valstream = valstream | mp.as_list

history = model.fit_generator(
      trainstream | mp.infshuffle |  mp.as_batch('audiox', 'class_id', batchsize=batchsize),
      steps_per_epoch=no_train // batchsize,
      validation_data= valstream | mp.infshuffle | mp.as_batch('audiox', 'class_id', batchsize=batchsize),
      validation_steps = no_test // batchsize,
      epochs=10, verbose=1)

In [10]:
model.save("x:/models/audio126.hdf5")

In [11]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()