This notebook is to contain notes on different models, leading up to a decision on which model to use in production.

This notebook incorporates the dataloader defined in https://github.com/axelth/us8kdata , and will also incorporate the feature extraction pipeline prototyped in notebooks/.... and implemented in hasp/{features.py, pipeline.py} etc.

imports for individual models are performed in their respective section to make it easier to see which code to transfer to the library after model selection

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from us8kdata.loader import UrbanSound8K


In [32]:
## temporary imports
## remove when feature pipeline library is done
# from sklearn.pipeline import make_pipeline
# Instead of make_pipeline, we can use the Pipeline class directly.
# It involves a bit more typing, but we can name each step, and understand the params dictionary more easily
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import FunctionTransformer
# for wrapping the stft, mel filterbank and bandpass filter, mfcc

from sklearn.preprocessing import StandardScaler
# for scaling the features, not sure in which stage to apply this

from librosa.feature import mfcc

from sklearn.metrics import classification_report, confusion_matrix

In [5]:
def samples_to_mean_mfcc(examples, sr=16000, n_fft=512, hop_length=128, fmin=0.0, fmax=8000, **kwargs):
    
    # to prevent trying to take the lo
    return np.array([mfcc(y=sample, sr=sr, n_fft=n_fft, n_mels=100, hop_length=128, fmin=0.0, fmax=8000, **kwargs).mean(axis=1) for sample in examples], 
                    dtype=np.float32)

mean_mfcc_feat = FunctionTransformer(samples_to_mean_mfcc, 
                                     kw_args={'sr':16000, 'n_mfcc':20, 
                                              'n_fft': 512, 'hop_length': 128, 
                                              'fmin': 0.0, 'fmax': None})
feature_pipe = Pipeline(
    [
        ('mean_mfcc', mean_mfcc_feat),
        ('scaler', StandardScaler())
    ]
)

In [18]:
data = UrbanSound8K('../hasp/data')
train_fold = [samples for samples in data.fold_audio_generator(fold=[1, 2, 3, 6, 7, 8, 9, 10])]
val_fold = [samples for samples in data.fold_audio_generator(fold=[4])]
test_fold = [samples for samples in data.fold_audio_generator(fold=[5])]
train_y = data.filter_metadata(fold=[1, 2, 3, 6, 7, 8, 9, 10]).classID
val_y = data.filter_metadata(fold=[4]).classID
test_y = data.filter_metadata(fold=[5]).classID

In [33]:
data.metadata[['classID', 'class']]

Unnamed: 0,classID,class
0,3,dog_bark
1,2,children_playing
2,2,children_playing
3,2,children_playing
4,2,children_playing
...,...,...
8727,1,car_horn
8728,1,car_horn
8729,1,car_horn
8730,1,car_horn


In [19]:
train_pre = feature_pipe.fit_transform(train_fold)
val_pre = feature_pipe.transform(val_fold)
test_pre = feature_pipe.transform(test_fold)

# Single sample classification
Model architectures for treating each audio file as a single example and classifying it in one go.

## Logistic Regression (baseline)

### LASSO

## KNN

## SVC

In [30]:
from sklearn.svm import SVC
model_svc = SVC(C=4)
model_svc.fit(train_pre, train_y)

In [31]:
#val_pre.shape
svc_val_prediction = model_svc.predict(val_pre)
print(classification_report(
    np.where((val_y == 1)| (val_y == 8), val_y, 11),
    np.where((svc_val_prediction == 1) | (svc_val_prediction == 8),svc_val_prediction, 11)))
print(classification_report(
     val_y, 
    svc_val_prediction))
print(confusion_matrix(val_y, svc_val_prediction, labels=)

              precision    recall  f1-score   support

           1       0.57      0.42      0.49        59
           8       0.91      0.82      0.86       166
          11       0.92      0.96      0.94       765

    accuracy                           0.90       990
   macro avg       0.80      0.73      0.76       990
weighted avg       0.90      0.90      0.90       990

              precision    recall  f1-score   support

           0       0.52      0.43      0.47       100
           1       0.57      0.42      0.49        59
           2       0.47      0.42      0.44       100
           3       0.41      0.75      0.53       100
           4       0.51      0.63      0.56       100
           5       0.31      0.22      0.26       107
           6       0.50      0.61      0.55        38
           7       0.19      0.04      0.07       120
           8       0.91      0.82      0.86       166
           9       0.28      0.47      0.35       100

    accuracy           

## RandomForest

## XGBoost

In [None]:
!pip install xgboos

## VGGish

# Sequence of samples classification
Model architectures for treating each file as a sequence of sample frames, making the classification either based on a subset or on the frame as a whole.

## HMM

## RNN