In [47]:
from hasp.make_feature_pipeline import make_feature_pipeline
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from hasp.util import combine_classes
import pandas as pd

## Data

In [2]:
from us8kdata.loader import UrbanSound8K
data = UrbanSound8K('../hasp/data')


In [3]:
train_fold = [samples for samples in data.fold_audio_generator(fold=[1, 2])]#, 3, 6, 7, 8, 9, 10])]
val_fold = [samples for samples in data.fold_audio_generator(fold=[4])]
test_fold = [samples for samples in data.fold_audio_generator(fold=[5])]


In [4]:
train_y = data.filter_metadata(fold=[1, 2]).classID
val_y = data.filter_metadata(fold=[4]).classID
test_y = data.filter_metadata(fold=[5]).classID

## Training Pipeline (Base)

In [5]:
pipe = make_feature_pipeline()


RandomForestClassifier by deafult criterion='gini'

In [6]:
full_pipe = Pipeline([
    ('preproc', pipe),
    ('rand_forest', RandomForestClassifier())
])

In [35]:
full_pipe.get_params

<bound method Pipeline.get_params of Pipeline(steps=[('preproc',
                 Pipeline(steps=[('mean_mfcc',
                                  FunctionTransformer(func=<function samples_to_mean_mfcc at 0x10f4b79d0>,
                                                      kw_args={'fmax': None,
                                                               'fmin': 0.0,
                                                               'hop_length': 128,
                                                               'n_fft': 512,
                                                               'n_mfcc': 20,
                                                               'sr': 16000})),
                                 ('scaler', StandardScaler())])),
                ('rand_forest', RandomForestClassifier())])>

In [7]:
full_pipe.fit(train_fold, train_y)

In [8]:
pred_y = full_pipe.predict(val_fold)

In [9]:
print(classification_report(val_y, pred_y))

              precision    recall  f1-score   support

           0       0.80      0.12      0.21       100
           1       0.85      0.19      0.31        59
           2       0.24      0.33      0.28       100
           3       0.32      0.72      0.44       100
           4       0.53      0.56      0.54       100
           5       0.57      0.28      0.37       107
           6       0.79      0.29      0.42        38
           7       0.46      0.32      0.37       120
           8       0.74      0.40      0.52       166
           9       0.23      0.58      0.33       100

    accuracy                           0.39       990
   macro avg       0.55      0.38      0.38       990
weighted avg       0.54      0.39      0.39       990



In [10]:
print(confusion_matrix(val_y, pred_y))

[[12  0 38  1 24  5  0  1  0 19]
 [ 0 11  4  7  3  0  0  4  1 29]
 [ 0  0 33 27  8  1  2  3  0 26]
 [ 0  0  9 72  8  0  1  0  3  7]
 [ 0  0  6  5 56  0  0 26  1  6]
 [ 0  0 14 22  1 30  0  8 16 16]
 [ 0  0  6  9  0  0 11  3  0  9]
 [ 2  0  3  0  2 11  0 38  0 64]
 [ 0  0  9 66  4  3  0  0 66 18]
 [ 1  2 16 18  0  3  0  0  2 58]]


## Comparing with model of different criterion (When criterion='entropy')

In [11]:
pipe1 = make_feature_pipeline()

In [12]:
full_pipe1 = Pipeline([
    ('preproc', pipe1),
    ('model', RandomForestClassifier(criterion='entropy'))
])

In [13]:
full_pipe1.fit(train_fold, train_y)

In [14]:
pred_y1 = full_pipe1.predict(val_fold)

In [15]:
print(classification_report(val_y, pred_y1))

              precision    recall  f1-score   support

           0       0.63      0.12      0.20       100
           1       0.82      0.15      0.26        59
           2       0.25      0.30      0.27       100
           3       0.33      0.75      0.45       100
           4       0.50      0.57      0.54       100
           5       0.52      0.32      0.39       107
           6       0.86      0.32      0.46        38
           7       0.28      0.23      0.25       120
           8       0.73      0.43      0.54       166
           9       0.25      0.55      0.34       100

    accuracy                           0.39       990
   macro avg       0.52      0.37      0.37       990
weighted avg       0.49      0.39      0.38       990



In [16]:
print(confusion_matrix(val_y, pred_y1))

[[12  0 36  1 27  2  0  0  0 22]
 [ 0  9  6  5  7  1  0  4  2 25]
 [ 0  0 30 32  7  1  2  3  1 24]
 [ 0  0  6 75  6  0  0  0  6  7]
 [ 0  0  5  5 57  1  0 22  0 10]
 [ 0  0  2 22  0 34  0 31 16  2]
 [ 0  0  6  9  1  0 12  4  0  6]
 [ 4  0  6  0  5 16  0 27  0 62]
 [ 0  0  7 63  3  7  0  4 71 11]
 [ 3  2 17 18  0  4  0  0  1 55]]


## Other comparison 

When fmin=500, fmax=4000, criterion='gini'

In [17]:
pipe2 = make_feature_pipeline()

In [18]:
pipe2.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'mean_mfcc', 'scaler', 'mean_mfcc__accept_sparse', 'mean_mfcc__check_inverse', 'mean_mfcc__feature_names_out', 'mean_mfcc__func', 'mean_mfcc__inv_kw_args', 'mean_mfcc__inverse_func', 'mean_mfcc__kw_args', 'mean_mfcc__validate', 'scaler__copy', 'scaler__with_mean', 'scaler__with_std'])

In [19]:
pipe2.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [20]:
full_pipe2 = Pipeline([
            ('preproc', pipe2),
            ('model', RandomForestClassifier())
            ])

In [21]:
full_pipe2.fit(train_fold, train_y)

In [22]:
pred_y2 = full_pipe2.predict(val_fold)

In [23]:
print(classification_report(val_y, pred_y2))

              precision    recall  f1-score   support

           0       0.71      0.10      0.18       100
           1       0.85      0.19      0.31        59
           2       0.22      0.35      0.27       100
           3       0.32      0.73      0.45       100
           4       0.54      0.59      0.56       100
           5       0.57      0.29      0.39       107
           6       0.73      0.29      0.42        38
           7       0.38      0.28      0.32       120
           8       0.77      0.37      0.50       166
           9       0.24      0.56      0.33       100

    accuracy                           0.38       990
   macro avg       0.53      0.37      0.37       990
weighted avg       0.52      0.38      0.38       990



In [24]:
print(confusion_matrix(val_y, pred_y2))

[[10  0 37  1 26  4  0  1  2 19]
 [ 1 11  5  5  4  2  0  5  2 24]
 [ 0  0 35 33  6  1  2  4  0 19]
 [ 0  0  9 73  5  0  2  0  4  7]
 [ 0  0  6  3 59  0  0 27  0  5]
 [ 0  0 30 14  0 31  0 13  8 11]
 [ 0  0  6  7  1  0 11  3  0 10]
 [ 1  0  6  0  2  9  0 33  0 69]
 [ 0  0  8 71  5  2  0  1 61 18]
 [ 2  2 14 18  1  5  0  0  2 56]]


## Camparison 3

When fmin=500, fmax=4000, criterion='entropy'

In [25]:
pipe3 = make_feature_pipeline()

In [26]:
pipe3.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [27]:
full_pipe3 = Pipeline([
            ('preproc', pipe3),
            ('model', RandomForestClassifier(criterion='entropy'))
            ])

In [28]:
full_pipe3.fit(train_fold, train_y)

In [29]:
pred_y3 = full_pipe3.predict(val_fold)

In [30]:
print(classification_report(val_y, pred_y3))

              precision    recall  f1-score   support

           0       0.30      0.03      0.05       100
           1       0.72      0.22      0.34        59
           2       0.22      0.28      0.25       100
           3       0.32      0.76      0.45       100
           4       0.53      0.55      0.54       100
           5       0.56      0.30      0.39       107
           6       0.80      0.32      0.45        38
           7       0.27      0.22      0.24       120
           8       0.72      0.37      0.49       166
           9       0.21      0.50      0.29       100

    accuracy                           0.36       990
   macro avg       0.47      0.35      0.35       990
weighted avg       0.45      0.36      0.35       990



## Conclusion

In [32]:
table = pd.DataFrame(classification_report(val_y, pred_y, output_dict=True)).transpose()
table1 = pd.DataFrame(classification_report(val_y, pred_y1, output_dict=True)).transpose()
table2 = pd.DataFrame(classification_report(val_y, pred_y2, output_dict=True)).transpose()
table3 = pd.DataFrame(classification_report(val_y, pred_y3, output_dict=True)).transpose()

In [33]:
results = {
    'mean_precision': [table[['precision']].mean(), table1[["precision"]].mean(), table2[['precision']].mean(), table3[['precision']].mean()], 
    'mean_recall': [table[['recall']].mean(), table1[['recall']].mean(), table2[['recall']].mean(), table3[['recall']].mean()], 
    'mean_f1score': [table[['f1-score']].mean(), table1[['f1-score']].mean(), table2[['f1-score']].mean(), table3[['f1-score']].mean()]           
}

In [34]:
df = pd.DataFrame(results)
df

Unnamed: 0,mean_precision,mean_recall,mean_f1score
0,precision 0.537647 dtype: float64,recall 0.380032 dtype: float64,f1-score 0.381126 dtype: float64
1,precision 0.504334 dtype: float64,recall 0.374877 dtype: float64,f1-score 0.372324 dtype: float64
2,precision 0.521931 dtype: float64,recall 0.375353 dtype: float64,f1-score 0.373171 dtype: float64
3,precision 0.456604 dtype: float64,recall 0.355469 dtype: float64,f1-score 0.351011 dtype: float64


## Results

RFC model with criterion='gini'(default), pipeline params of mean_mfcc__kw_args={'fmin'=None, 'fmax'=0.0} (default) has the best overall performance.

## Comparison of classification reports of 10-class and 3-class


In [41]:
##Using the results of base model
class_report_10 = pd.DataFrame(classification_report(val_y, pred_y, output_dict=True)).transpose()
class_report_10

Unnamed: 0,precision,recall,f1-score,support
0,0.8,0.12,0.208696,100.0
1,0.846154,0.186441,0.305556,59.0
2,0.23913,0.33,0.277311,100.0
3,0.317181,0.72,0.440367,100.0
4,0.528302,0.56,0.543689,100.0
5,0.566038,0.280374,0.375,107.0
6,0.785714,0.289474,0.423077,38.0
7,0.457831,0.316667,0.374384,120.0
8,0.741573,0.39759,0.517647,166.0
9,0.230159,0.58,0.329545,100.0


In [51]:
new_pred = combine_classes([1, 8], pred_y)
new_val = combine_classes([1,8], val_y)


In [52]:
class_report_3 = pd.DataFrame(classification_report(new_val, new_pred, output_dict=True)).transpose()
class_report_3

Unnamed: 0,precision,recall,f1-score,support
1,0.846154,0.186441,0.305556,59.0
7,0.834459,0.968627,0.896552,765.0
8,0.741573,0.39759,0.517647,166.0
accuracy,0.826263,0.826263,0.826263,0.826263
macro avg,0.807395,0.517553,0.573251,990.0
weighted avg,0.819582,0.826263,0.797797,990.0
