## Exploring audio features in D3 Project

In [1]:
import sys
sys.path.append('./src')

In [2]:
from config import *

import mPyPl as mp
import mPyPl.utils.image as mpui
from mpyplx import *
from pipe import Pipe
from moviepy.editor import *
import numpy as np
import itertools
import cv2
import math
import matplotlib.pyplot as plt

Load all audio features and normalize them

In [3]:
test_names = (
   from_json(os.path.join(source_dir,'matches.json'))
 | mp.where(lambda x: 'Test' in x.keys() and int(x['Test'])>0)
 | mp.apply(['Id','Half'],'pattern',lambda x: "{}_{}_".format(x[0],x[1]))
 | mp.select_field('pattern')
 | mp.as_list)

classes = mp.get_classes(data_dir)

data = (
 mp.get_datastream(data_dir,ext=".resized.mp4",classes=classes)
 | datasplit_by_pattern(test_pattern=test_names)
 | stratify_sample_tt(shuffle=True)
 | summary()
 #| mp.take(2000)
 #| mp.iter('filename',lambda x: print("Processing {}".format(x)))
 | mp.apply('filename','aud',lambda x: np.load(x.replace('.resized.mp4','.audiofeatures.npy')))
 | normalize_npy_value('aud',interval=(0,1))
 | mp.apply('aud','mean',lambda x: np.mean(x,axis=1))
 | summary()
 | mp.as_list
)

print("Using the following classes:\n%s" % classes)

## Explore sound features

In [4]:
f_names = ['zcr','energy','energy_entropy','spectral_centroid','spectral_spread','spectral_entropy','spectral_flux','spectral_rolloff',
           'mfcc_1','mfcc_2','mfcc_3','mfcc_4','mfcc_5','mfcc_6','mfcc_7','mfcc_8','mfcc_9','mfcc_10','mfcc_11','mfcc_12','mfcc_13',
           'chroma_1','chroma_2','chroma_3','chroma_4','chroma_5','chroma_6','chroma_7','chroma_8','chroma_9','chroma_10','chroma_11','chroma_12',
           'chroma_std']

In [5]:
fts = data[0]['aud']

def plotfeatures(a):
    for x in a:
        plt.plot(fts[x])
        plt.title(f_names[x])
        plt.show()
        
plotfeatures([0,1,2,3,4,5,6])

## Compare audio feature means for shots vs. no-shots

In [6]:
shots_mean = data | mp.filter('class_id',lambda x:x==2) | mp.select_field('mean') | mp.as_npy
no_shots_mean = data | mp.filter('class_id',lambda x:x==1) | mp.select_field('mean') | mp.as_npy
print(shots_mean.shape,no_shots_mean.shape)

In [7]:
plt.figure(figsize=(15,8))
plt.bar(f_names, np.mean(shots_mean,axis=0) - np.mean(no_shots_mean,axis=0))
plt.xticks(rotation='vertical', fontsize=15)
plt.yticks(fontsize=15)
plt.show()

## Trying to classify using scikit

#### Classify using correct train-test split (by matches) - 2 classes

In [16]:
train,test = data | mp.filter('class_id',lambda x:x!=0) | mp.apply('class_id','new_class_id',lambda x:x-1) | mp.make_train_test_split

In [17]:
def make_xy(data,field_name_x,field_name_y):
    t = data | mp.select_field([field_name_x,field_name_y]) | mp.as_list
    u,v = zip(*t)
    return list(u),list(v)

In [10]:
X_train,Y_train = make_xy(train,'mean','new_class_id')
X_test,Y_test = make_xy(test,'mean','new_class_id')

In [0]:
from sklearn import metrics
from sklearn import tree, ensemble


# expects flattened data going in
def ID3(xtr,xte,ytr,yte, title="confusion"):
    print(title)
    #classifier = tree.DecisionTreeClassifier()
    classifier = ensemble.RandomForestClassifier()
    clf = classifier.fit(xtr, ytr)
    p = clf.predict(xte)
    print(
        metrics.classification_report(
            yte, 
            p
        ))
    print(metrics.confusion_matrix(yte, p))
    #plotConfusion(yte, p, title=title)
    return p, clf



In [12]:
ID3(X_train, X_test, Y_train, Y_test)

#### Classify using correct split -- 3 classes

In [0]:
train,test = data | mp.make_train_test_split
X_train,Y_train = make_xy(train,'mean','class_id')
X_test,Y_test = make_xy(test,'mean','class_id')

In [14]:
ID3(X_train, X_test, Y_train, Y_test)

#### Classify using random split - 3 classes

In [15]:
X,Y = make_xy(data,'mean','class_id')

In [171]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, Y, random_state=0)

In [172]:
ID3(X_train, X_val, y_train, y_val)