In [27]:
from hasp.make_feature_pipeline import make_feature_pipeline
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
from hasp.util import combine_classes
import pandas as pd
import numpy as np

## Data 

In [2]:
from us8kdata.loader import UrbanSound8K
data = UrbanSound8K('../hasp/data')

In [3]:
train_fold = [samples for samples in data.fold_audio_generator(fold=[1, 2])]#, 3, 6, 7, 8, 9, 10])]
val_fold = [samples for samples in data.fold_audio_generator(fold=[4])]
test_fold = [samples for samples in data.fold_audio_generator(fold=[5])]

In [4]:
train_y = data.filter_metadata(fold=[1, 2]).classID
val_y = data.filter_metadata(fold=[4]).classID
test_y = data.filter_metadata(fold=[5]).classID

## Model -Base
Using default parameters. 

In [6]:
pipe0 = make_feature_pipeline()

In [7]:
full_pipe0 = Pipeline([
    ('preproc', pipe0),
    ('xgb_clf', XGBClassifier())
])

In [8]:
full_pipe0.get_params

<bound method Pipeline.get_params of Pipeline(steps=[('preproc',
                 Pipeline(steps=[('mean_mfcc',
                                  FunctionTransformer(func=<function samples_to_mean_mfcc at 0x1115bac10>,
                                                      kw_args={'fmax': None,
                                                               'fmin': 0.0,
                                                               'hop_length': 128,
                                                               'n_fft': 512,
                                                               'n_mfcc': 20,
                                                               'sr': 16000})),
                                 ('scaler', StandardScaler())])),
                ('xgb_clf',
                 XGBClassifier(base_score=None, booster=None, callbacks=None,
                               colsample_bylevel=None, colsample_bynod...
                               gamma=None, gpu_id=None, grow_policy

In [9]:
full_pipe0.fit(train_fold, train_y)

In [10]:
pred_y0 = full_pipe0.predict(val_fold)

### Classification Report and Confusion Matrix

In [49]:
print(classification_report(val_y, pred_y0))

              precision    recall  f1-score   support

           0       0.55      0.24      0.33       100
           1       0.57      0.22      0.32        59
           2       0.23      0.27      0.25       100
           3       0.38      0.69      0.49       100
           4       0.49      0.60      0.54       100
           5       0.29      0.28      0.29       107
           6       0.45      0.13      0.20        38
           7       0.44      0.27      0.33       120
           8       0.68      0.36      0.47       166
           9       0.25      0.57      0.35       100

    accuracy                           0.38       990
   macro avg       0.43      0.36      0.36       990
weighted avg       0.44      0.38      0.37       990



In [12]:
print(confusion_matrix(val_y, pred_y0))

[[24  0 30  2 23  2  0  5  3 11]
 [ 2 13  3  7  6  1  0  8  0 19]
 [ 1  0 27 27 14  1  2  4  2 22]
 [ 2  2  7 69  5  0  2  0  5  8]
 [ 1  0  4  0 60  2  0 20  0 13]
 [ 2  3 17  7  0 30  2  0 16 30]
 [ 0  0  7  9  0  0  5  4  0 13]
 [ 9  0  9  0  3 18  0 32  0 49]
 [ 0  0  3 44  7 47  0  0 59  6]
 [ 3  5 10 17  4  2  0  0  2 57]]


### Mean of Scores

In [44]:
report0 = pd.DataFrame(classification_report(val_y, pred_y0, output_dict=True)).transpose()
mean_report0 = {'mean_precision': np.mean(report0['precision']),
                'mean_recall':np.mean(report0['recall']),
                'mean_f1-score': np.mean(report0['f1-score'])
                }
pd.DataFrame(mean_report0, index=[0])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
0,0.42908,0.365109,0.359594


### Classification report on combined classes

In [25]:
combined_val_y = combine_classes([1, 8], val_y)
com_pred_y0 = combine_classes([1,8], pred_y0)

report0_1 = classification_report(combined_val_y, com_pred_y0)
print(report0_1)

              precision    recall  f1-score   support

           1       0.57      0.22      0.32        59
           7       0.83      0.95      0.88       765
           8       0.68      0.36      0.47       166

    accuracy                           0.81       990
   macro avg       0.69      0.51      0.56       990
weighted avg       0.79      0.81      0.78       990



## Model 1

When mean_mfcc__kw_args= {'fmin':500, 'fmax':4000}.

In [13]:
pipe1 = make_feature_pipeline()

In [16]:
pipe1.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [18]:
full_pipe1 = Pipeline([
    ('preproc', pipe1), 
    ('xgb_clf', XGBClassifier())
])

In [109]:
full_pipe1.get_params

<bound method Pipeline.get_params of Pipeline(steps=[('preproc',
                 Pipeline(steps=[('mean_mfcc',
                                  FunctionTransformer(func=<function samples_to_mean_mfcc at 0x1115bac10>,
                                                      kw_args={'fmax': 4000,
                                                               'fmin': 500})),
                                 ('scaler', StandardScaler())])),
                ('xgb_clf',
                 XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
                               colsample_bylevel=1, colsample_bynode=1,
                               colsample_bytree=1, early_stopping_rounds=None,
                               en...
                               gamma=0, gpu_id=-1, grow_policy='depthwise',
                               importance_type=None, interaction_constraints='',
                               learning_rate=0.300000012, max_bin=256,
                               max

In [19]:
full_pipe1.fit(train_fold, train_y)

In [20]:
pred_y1 = full_pipe1.predict(val_fold)

### Classification Report and Confusion Matrix

In [48]:
print(classification_report(val_y, pred_y1))

              precision    recall  f1-score   support

           0       0.55      0.24      0.33       100
           1       0.57      0.22      0.32        59
           2       0.23      0.27      0.25       100
           3       0.38      0.69      0.49       100
           4       0.49      0.60      0.54       100
           5       0.29      0.28      0.29       107
           6       0.45      0.13      0.20        38
           7       0.44      0.27      0.33       120
           8       0.68      0.36      0.47       166
           9       0.25      0.57      0.35       100

    accuracy                           0.38       990
   macro avg       0.43      0.36      0.36       990
weighted avg       0.44      0.38      0.37       990



In [22]:
print(confusion_matrix(val_y, pred_y1))

[[24  0 30  2 23  2  0  5  3 11]
 [ 2 13  3  7  6  1  0  8  0 19]
 [ 1  0 27 27 14  1  2  4  2 22]
 [ 2  2  7 69  5  0  2  0  5  8]
 [ 1  0  4  0 60  2  0 20  0 13]
 [ 2  3 17  7  0 30  2  0 16 30]
 [ 0  0  7  9  0  0  5  4  0 13]
 [ 9  0  9  0  3 18  0 32  0 49]
 [ 0  0  3 44  7 47  0  0 59  6]
 [ 3  5 10 17  4  2  0  0  2 57]]


### Mean of Scores

In [36]:
report1= pd.DataFrame(classification_report(val_y, pred_y1, output_dict=True)).transpose()
mean_report1 = {'mean_precision': np.mean(report1['precision']),
                'mean_recall':np.mean(report1['recall']),
                'mean_f1-score': np.mean(report1['f1-score'])
                }
pd.DataFrame(mean_report1, index=[1])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
1,0.42908,0.365109,0.359594


### Classification Report on Combined Classes

In [47]:
com_pred_y1 = combine_classes([1, 8], pred_y1)

report1_1 = classification_report(combined_val_y, com_pred_y1)
print(report1_1)

              precision    recall  f1-score   support

           1       0.57      0.22      0.32        59
           7       0.83      0.95      0.88       765
           8       0.68      0.36      0.47       166

    accuracy                           0.81       990
   macro avg       0.69      0.51      0.56       990
weighted avg       0.79      0.81      0.78       990



### Conclusion -1

mean_mfcc__kw_args={'fmin':500, 'fmax':5000} does not influence the scores of XGBClassifier.

## Model 2 

When mean_mfcc__kw_args is None and xgb_clf__kw_args={'n_estimator':400, 'learning_rate':0.1, 'max_depth':8, 'objective':'multi:softmax'}

In [23]:
pipe2 = make_feature_pipeline() 

In [41]:
full_pipe2 = Pipeline([
    ('preproc', pipe2), 
    ('xgb_clf', XGBClassifier(n_estimators=400, 
                              learning_rate=0.1,
                              max_depth=8,
                              objective='multi:softmax'))
])

In [42]:
full_pipe2.fit(train_fold, train_y)

In [43]:
pred_y2 = full_pipe2.predict(val_fold)

### Classification Report and Confusion Matrix

In [50]:
print(classification_report(val_y, pred_y2))

              precision    recall  f1-score   support

           0       0.57      0.25      0.35       100
           1       0.57      0.22      0.32        59
           2       0.21      0.25      0.23       100
           3       0.40      0.72      0.51       100
           4       0.48      0.61      0.54       100
           5       0.26      0.25      0.26       107
           6       0.33      0.16      0.21        38
           7       0.43      0.29      0.35       120
           8       0.76      0.39      0.52       166
           9       0.26      0.55      0.35       100

    accuracy                           0.39       990
   macro avg       0.43      0.37      0.36       990
weighted avg       0.45      0.39      0.38       990



In [51]:
print(confusion_matrix(val_y, pred_y2))

[[25  0 29  1 24  3  0  2  5 11]
 [ 2 13  2  9  6  1  0  8  0 18]
 [ 1  0 25 26 13  2  4  3  2 24]
 [ 2  2  7 72  5  0  1  0  5  6]
 [ 1  0  4  0 61  1  0 24  0  9]
 [ 2  2 24  7  0 27  7  5  7 26]
 [ 0  0  5  9  2  0  6  4  0 12]
 [ 5  0  8  0  6 16  0 35  0 50]
 [ 0  0  7 40  5 49  0  0 65  0]
 [ 6  6  8 17  4  3  0  0  1 55]]


### Mean of Metrics

In [52]:
report2= pd.DataFrame(classification_report(val_y, pred_y2, output_dict=True)).transpose()
mean_report2 = {'mean_precision': np.mean(report2['precision']),
                'mean_recall':np.mean(report2['recall']),
                'mean_f1-score': np.mean(report2['f1-score'])
                }
pd.DataFrame(mean_report2, index=[2])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
2,0.426661,0.372226,0.367177


### Classification Report on Combined Classes

In [53]:
com_pred_y2 = combine_classes([1,8], pred_y2)

print(classification_report(combined_val_y, com_pred_y2))

              precision    recall  f1-score   support

           1       0.57      0.22      0.32        59
           7       0.83      0.96      0.89       765
           8       0.76      0.39      0.52       166

    accuracy                           0.82       990
   macro avg       0.72      0.52      0.58       990
weighted avg       0.81      0.82      0.80       990



## Model 3

When params for XBGClassifiers is set the same as model 2 and mean_mfcc__kw_args={'fmin':500, 'fmax':4000} is set. 

In [54]:
pipe3 = make_feature_pipeline()

In [55]:
pipe3.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [56]:
full_pipe3 = Pipeline([
    ('preproc', pipe3), 
    ('xgb_clf', XGBClassifier(n_estimators=400, 
                              learning_rate=0.1,
                              max_depth=8,
                              objective='multi:softmax'))
])

In [57]:
full_pipe3.fit(train_fold, train_y)

In [58]:
pred_y3 = full_pipe3.predict(val_fold)

### Classification Report and Confusion Matrix

In [59]:
print(classification_report(val_y, pred_y3))

              precision    recall  f1-score   support

           0       0.57      0.25      0.35       100
           1       0.57      0.22      0.32        59
           2       0.21      0.25      0.23       100
           3       0.40      0.72      0.51       100
           4       0.48      0.61      0.54       100
           5       0.26      0.25      0.26       107
           6       0.33      0.16      0.21        38
           7       0.43      0.29      0.35       120
           8       0.76      0.39      0.52       166
           9       0.26      0.55      0.35       100

    accuracy                           0.39       990
   macro avg       0.43      0.37      0.36       990
weighted avg       0.45      0.39      0.38       990



In [60]:
print(confusion_matrix(val_y, pred_y3))

[[25  0 29  1 24  3  0  2  5 11]
 [ 2 13  2  9  6  1  0  8  0 18]
 [ 1  0 25 26 13  2  4  3  2 24]
 [ 2  2  7 72  5  0  1  0  5  6]
 [ 1  0  4  0 61  1  0 24  0  9]
 [ 2  2 24  7  0 27  7  5  7 26]
 [ 0  0  5  9  2  0  6  4  0 12]
 [ 5  0  8  0  6 16  0 35  0 50]
 [ 0  0  7 40  5 49  0  0 65  0]
 [ 6  6  8 17  4  3  0  0  1 55]]


### Mean of Scores

In [61]:
report3= pd.DataFrame(classification_report(val_y, pred_y3, output_dict=True)).transpose()
mean_report3 = {'mean_precision': np.mean(report3['precision']),
                'mean_recall':np.mean(report3['recall']),
                'mean_f1-score': np.mean(report3['f1-score'])
                }
pd.DataFrame(mean_report3, index=[3])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
3,0.426661,0.372226,0.367177


### Classification Report on Combined Classes

In [62]:
com_pred_y3 = combine_classes([1,8], pred_y3)

print(classification_report(combined_val_y, com_pred_y3))

              precision    recall  f1-score   support

           1       0.57      0.22      0.32        59
           7       0.83      0.96      0.89       765
           8       0.76      0.39      0.52       166

    accuracy                           0.82       990
   macro avg       0.72      0.52      0.58       990
weighted avg       0.81      0.82      0.80       990



### Conclusion 

mean_mfcc__kw_args does not influence the performance of XGBClassifiers in anycases. 

## Model 4 

When mean_mfcc__kw_args is set and xgb_clf__kw_args={'n_estimator':700, 'learning_rate':0.1, 'max_depth':8, 'objective':'multi:softmax'}

In [65]:
pipe4 = make_feature_pipeline()

In [66]:
pipe4.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [67]:
full_pipe4 = Pipeline([
    ('preproc', pipe4), 
    ('xgb_clf', XGBClassifier(n_estimators=700,
                              learning_rate=0.1,
                              max_depth=8,
                              objective='multi:softmax'))
])

In [68]:
full_pipe4.fit(train_fold, train_y)

In [69]:
pred_y4 = full_pipe4.predict(val_fold)

### Clasification Report and Confusion Matrix

In [70]:
print(classification_report(val_y, pred_y4))

              precision    recall  f1-score   support

           0       0.55      0.26      0.35       100
           1       0.57      0.22      0.32        59
           2       0.21      0.23      0.22       100
           3       0.39      0.71      0.50       100
           4       0.49      0.61      0.54       100
           5       0.27      0.25      0.26       107
           6       0.29      0.18      0.23        38
           7       0.43      0.32      0.36       120
           8       0.73      0.40      0.52       166
           9       0.27      0.54      0.36       100

    accuracy                           0.39       990
   macro avg       0.42      0.37      0.37       990
weighted avg       0.44      0.39      0.39       990



In [71]:
print(confusion_matrix(val_y, pred_y4))

[[26  0 29  2 24  3  0  2  7  7]
 [ 2 13  2  8  6  2  0  8  0 18]
 [ 1  0 23 27 13  2  5  3  3 23]
 [ 2  2  8 71  5  0  2  0  5  5]
 [ 1  0  4  0 61  1  0 24  0  9]
 [ 2  2 17  8  0 27 10 10  8 23]
 [ 0  0  5  8  2  0  7  4  0 12]
 [ 5  0  8  0  5 16  0 38  0 48]
 [ 0  0  5 43  5 47  0  0 66  0]
 [ 8  6  8 16  4  3  0  0  1 54]]


### Mean of Scores

In [72]:
report4= pd.DataFrame(classification_report(val_y, pred_y4, output_dict=True)).transpose()
mean_report4 = {'mean_precision': np.mean(report4['precision']),
                'mean_recall':np.mean(report4['recall']),
                'mean_f1-score': np.mean(report4['f1-score'])
                }
pd.DataFrame(mean_report4, index=[4])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
4,0.419009,0.37485,0.369448


### Classification Report on Combined Classes

In [73]:
com_pred_y4 = combine_classes([1,8], pred_y4)

print(classification_report(combined_val_y, com_pred_y4))

              precision    recall  f1-score   support

           1       0.57      0.22      0.32        59
           7       0.83      0.96      0.89       765
           8       0.73      0.40      0.52       166

    accuracy                           0.82       990
   macro avg       0.71      0.52      0.57       990
weighted avg       0.80      0.82      0.79       990



## Model 5

XGBClassifiers params changes, learning_rate=0.25

In [89]:
pipe5 = make_feature_pipeline()

pipe5.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [90]:
full_pipe5 = Pipeline([
    ('preproc', pipe5), 
    ('xgb_clf', XGBClassifier(n_estimators=400, 
                              learning_rate=0.025,
                              max_depth=8,
                              objective='multi:softmax'))
])

In [91]:
full_pipe5.fit(train_fold, train_y)

In [92]:
pred_y5 = full_pipe5.predict(val_fold)

### Classification Report and Confusion Matrix

In [93]:
print(classification_report(val_y, pred_y5))

              precision    recall  f1-score   support

           0       0.49      0.20      0.28       100
           1       0.67      0.20      0.31        59
           2       0.23      0.29      0.26       100
           3       0.39      0.73      0.51       100
           4       0.48      0.61      0.54       100
           5       0.27      0.21      0.24       107
           6       0.28      0.13      0.18        38
           7       0.46      0.31      0.37       120
           8       0.82      0.37      0.51       166
           9       0.24      0.55      0.33       100

    accuracy                           0.38       990
   macro avg       0.43      0.36      0.35       990
weighted avg       0.46      0.38      0.38       990



In [126]:
print(confusion_matrix(val_y, pred_y5))

[[20  0 30  1 27  2  0  2  3 15]
 [ 2 12  3  8  5  1  0  7  0 21]
 [ 1  0 29 24 11  3  5  3  2 22]
 [ 2  1  7 73  5  0  2  0  3  7]
 [ 0  0  5  0 61  2  0 26  0  6]
 [ 3  1 24  5  0 23  6  2  5 38]
 [ 0  0  5  9  4  0  5  4  0 11]
 [ 6  0  4  0  4 12  0 37  0 57]
 [ 0  0  7 48  9 40  0  0 62  0]
 [ 7  4 10 20  1  2  0  0  1 55]]


### Mean of Scores 

In [128]:
report5= pd.DataFrame(classification_report(val_y, pred_y5, output_dict=True)).transpose()
mean_report5 = {'mean_precision': np.mean(report5['precision']),
                'mean_recall':np.mean(report5['recall']),
                'mean_f1-score': np.mean(report5['f1-score'])
                }
pd.DataFrame(mean_report5, index=[5])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
5,0.429512,0.364195,0.356773


### Classification Report on Combined Classes

In [127]:
com_pred_y5 = combine_classes([1,8], pred_y5)

print(classification_report(combined_val_y, com_pred_y5))

              precision    recall  f1-score   support

           1       0.67      0.20      0.31        59
           7       0.83      0.97      0.90       765
           8       0.82      0.37      0.51       166

    accuracy                           0.83       990
   macro avg       0.77      0.52      0.57       990
weighted avg       0.82      0.83      0.80       990



## Model 6 

XGBClassifiers params adding n_thread=4.

In [141]:
pipe6 = make_feature_pipeline()

pipe6.set_params(mean_mfcc__kw_args={'fmin':500, 'fmax':4000})

In [142]:
full_pipe6 = Pipeline([
    ('preproc', pipe6), 
    ('xgb_clf', XGBClassifier(n_estimators=400, 
                              learning_rate=0.025, 
                              max_depth=8, 
                              objective='multi:softmax',
                              nthread=4))
])

In [143]:
full_pipe6.fit(train_fold, train_y)

In [144]:
pred_y6 = full_pipe6.predict(val_fold)

### Classification Report and Confusion Matrix

In [145]:
print(classification_report(val_y, pred_y6))

              precision    recall  f1-score   support

           0       0.49      0.20      0.28       100
           1       0.67      0.20      0.31        59
           2       0.23      0.29      0.26       100
           3       0.39      0.73      0.51       100
           4       0.48      0.61      0.54       100
           5       0.27      0.21      0.24       107
           6       0.28      0.13      0.18        38
           7       0.46      0.31      0.37       120
           8       0.82      0.37      0.51       166
           9       0.24      0.55      0.33       100

    accuracy                           0.38       990
   macro avg       0.43      0.36      0.35       990
weighted avg       0.46      0.38      0.38       990



In [146]:
print(confusion_matrix(val_y, pred_y6))

[[20  0 30  1 27  2  0  2  3 15]
 [ 2 12  3  8  5  1  0  7  0 21]
 [ 1  0 29 24 11  3  5  3  2 22]
 [ 2  1  7 73  5  0  2  0  3  7]
 [ 0  0  5  0 61  2  0 26  0  6]
 [ 3  1 24  5  0 23  6  2  5 38]
 [ 0  0  5  9  4  0  5  4  0 11]
 [ 6  0  4  0  4 12  0 37  0 57]
 [ 0  0  7 48  9 40  0  0 62  0]
 [ 7  4 10 20  1  2  0  0  1 55]]


### Mean of Scores

In [147]:
report6= pd.DataFrame(classification_report(val_y, pred_y6, output_dict=True)).transpose()
mean_report6 = {'mean_precision': np.mean(report6['precision']),
                'mean_recall':np.mean(report6['recall']),
                'mean_f1-score': np.mean(report6['f1-score'])
                }
pd.DataFrame(mean_report6, index=[6])

Unnamed: 0,mean_precision,mean_recall,mean_f1-score
6,0.429512,0.364195,0.356773


### Classification Report on Combined Classes

In [148]:
com_pred_y6 = combine_classes([1,8], pred_y6)

print(classification_report(combined_val_y, com_pred_y6))

              precision    recall  f1-score   support

           1       0.67      0.20      0.31        59
           7       0.83      0.97      0.90       765
           8       0.82      0.37      0.51       166

    accuracy                           0.83       990
   macro avg       0.77      0.52      0.57       990
weighted avg       0.82      0.83      0.80       990



## Conclusion 

Setting mean_mfcc__kw_args doesn't influence the performance of XGBoostClassifier as shown in the results, mean scores of model-base and 1, model 2 and 3, model 5 and 6 are the same. 

By setting the parameters of XGBoostClassifiers (n_estimators, learning_rate, max_depth, objective, nthread), it shows an increase of precision for class-1 but it also reduces the recall and f1-score.