In [1]:
from hasp.make_feature_pipeline import make_feature_pipeline
from sklearn.pipeline import Pipeline

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


## Data

In [2]:
from us8kdata.loader import UrbanSound8K
data = UrbanSound8K('../hasp/data')


In [3]:
train_fold = [samples for samples in data.fold_audio_generator(fold=[1, 2])]#, 3, 6, 7, 8, 9, 10])]
val_fold = [samples for samples in data.fold_audio_generator(fold=[4])]
test_fold = [samples for samples in data.fold_audio_generator(fold=[5])]


In [4]:
train_y = data.filter_metadata(fold=[1, 2]).classID
val_y = data.filter_metadata(fold=[4]).classID
test_y = data.filter_metadata(fold=[5]).classID

## Training Pipeline

In [5]:
pipe = make_feature_pipeline()


In [6]:
from sklearn.ensemble import RandomForestClassifier

RandomForestClassifier by deafult criterion='gini'

In [7]:
full_pipe = Pipeline([
    ('preproc', pipe),
    ('rand_forest', RandomForestClassifier())
])

In [8]:
full_pipe.fit(train_fold, train_y)

In [9]:
pred_y = full_pipe.predict(val_fold)

## Performance

In [10]:
from sklearn.metrics import classification_report, confusion_matrix

In [11]:
print(classification_report(val_y, pred_y))

              precision    recall  f1-score   support

           0       0.74      0.14      0.24       100
           1       0.75      0.15      0.25        59
           2       0.25      0.35      0.29       100
           3       0.33      0.73      0.46       100
           4       0.56      0.55      0.55       100
           5       0.53      0.26      0.35       107
           6       0.78      0.18      0.30        38
           7       0.32      0.25      0.28       120
           8       0.72      0.38      0.50       166
           9       0.22      0.56      0.31       100

    accuracy                           0.37       990
   macro avg       0.52      0.36      0.35       990
weighted avg       0.50      0.37      0.37       990



In [12]:
print(confusion_matrix(val_y, pred_y))

[[14  0 45  3 13  1  0  6  0 18]
 [ 0  9  3  8  5  1  0  5  2 26]
 [ 0  0 35 27  7  1  2  5  1 22]
 [ 0  1  6 73  6  0  0  0  4 10]
 [ 0  0  6  5 55  1  0 26  0  7]
 [ 1  0 17 12  0 28  0 20 16 13]
 [ 0  0  7  7  1  0  7  3  0 13]
 [ 3  0  2  0  5 16  0 30  0 64]
 [ 0  0  6 64  3  3  0  0 63 27]
 [ 1  2 13 20  4  2  0  0  2 56]]


## Comparing with model of different criterion (When criterion='entropy')

In [31]:
full_pipe2 = Pipeline([
    ('preproc', pipe),
    ('model', RandomForestClassifier(criterion='entropy'))
])

In [32]:
full_pipe2.fit(train_fold, train_y)

In [34]:
pred_y2 = full_pipe2.predict(val_fold)

In [35]:
print(classification_report(val_y, pred_y2))

              precision    recall  f1-score   support

           0       0.67      0.10      0.17       100
           1       0.80      0.14      0.23        59
           2       0.28      0.35      0.31       100
           3       0.31      0.70      0.43       100
           4       0.50      0.57      0.54       100
           5       0.52      0.30      0.38       107
           6       0.67      0.26      0.38        38
           7       0.36      0.27      0.31       120
           8       0.71      0.42      0.53       166
           9       0.22      0.52      0.31       100

    accuracy                           0.38       990
   macro avg       0.50      0.36      0.36       990
weighted avg       0.49      0.38      0.37       990



In [36]:
print(confusion_matrix(val_y, pred_y2))

[[10  0 38  4 26  3  0  0  1 18]
 [ 0  8  4  6  6  1  0  5  3 26]
 [ 0  0 35 31  7  1  2  3  0 21]
 [ 0  0  8 70  5  0  3  0  6  8]
 [ 0  0  6  4 57  0  0 22  1 10]
 [ 0  0  7 21  0 32  0 23 16  8]
 [ 0  0  3  7  2  0 10  4  0 12]
 [ 3  0  1  0  5 17  0 32  0 62]
 [ 0  0 12 61  4  4  0  0 70 15]
 [ 2  2 13 25  1  4  0  0  1 52]]
