This notebook is to contain notes on different models, leading up to a decision on which model to use in production.

This notebook incorporates the dataloader defined in https://github.com/axelth/us8kdata , and will also incorporate the feature extraction pipeline prototyped in notebooks/.... and implemented in hasp/{features.py, pipeline.py} etc.

imports for individual models are performed in their respective section to make it easier to see which code to transfer to the library after model selection

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from us8kdata.loader import UrbanSound8K


In [3]:
## temporary imports
## remove when feature pipeline library is done
# from sklearn.pipeline import make_pipeline
# Instead of make_pipeline, we can use the Pipeline class directly.
# It involves a bit more typing, but we can name each step, and understand the params dictionary more easily
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import FunctionTransformer
# for wrapping the stft, mel filterbank and bandpass filter, mfcc

from sklearn.preprocessing import StandardScaler
# for scaling the features, not sure in which stage to apply this

from librosa.feature import mfcc, melspectrogram
from librosa.core import power_to_db


In [5]:
def samples_to_mean_mfcc(examples, sr=16000, n_fft=512, hop_length=128, fmin=0.0, fmax=8000, **kwargs):
    
    # to prevent trying to take the lo
    return np.array([mfcc(y=sample, sr=sr, n_fft=n_fft, n_mels=100, hop_length=128, fmin=0.0, fmax=8000, **kwargs).mean(axis=1) for sample in examples], 
                    dtype=np.float32)

mean_mfcc_feat = FunctionTransformer(samples_to_mean_mfcc, 
                                     kw_args={'sr':16000, 'n_mfcc':20, 
                                              'n_fft': 512, 'hop_length': 128, 
                                              'fmin': 0.0, 'fmax': None})
feature_pipe = Pipeline(
    [
        ('mean_mfcc', mean_mfcc_feat),
        ('scaler', StandardScaler())
    ]
)

In [4]:
data = UrbanSound8K('../hasp/data')
train_fold = [samples for samples in data.fold_audio_generator(fold=[1, 2])]#, 3, 6, 7, 8, 9, 10])]
val_fold = [samples for samples in data.fold_audio_generator(fold=[4])]
test_fold = [samples for samples in data.fold_audio_generator(fold=[5])]
train_y = data.filter_metadata(fold=[1, 2]).classID#, 3, 6, 7, 8, 9, 10]).classID
val_y = data.filter_metadata(fold=[4]).classID
test_y = data.filter_metadata(fold=[5]).classID

In [6]:
train_pre = feature_pipe.fit_transform(train_fold)
val_pre = feature_pipe.transform(val_fold)
test_pre = feature_pipe.transform(test_fold)

# Single sample classification
Model architectures for treating each audio file as a single example and classifying it in one go.

## Logistic Regression (baseline)

### LASSO

## KNN

## SVC

In [11]:
from sklearn.svm import SVC
model_svc = SVC()
model_svc.fit(train_pre, train_y)

In [14]:
val_pre.shape
#model_svc.score(model_svc.predict(val_pre), val_y)

(990, 20)

## RandomForest

## XGBoost

## VGGish

# Sequence of samples classification
Model architectures for treating each file as a sequence of sample frames, making the classification either based on a subset or on the frame as a whole.

## HMM

## RNN