In [94]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import os

os.chdir('/content/drive/MyDrive/Data Science Padova/Semester 3/Human Data Analytics/Project')

In [12]:
import tqdm
from parameters import *
import librosa
from utils import *

In [63]:
import pandas as pd
import numpy as np
from scipy.stats import skew
from sklearn.utils import shuffle

# Scikit learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.utils import class_weight

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

# sklearn models

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

# xgb

from xgboost import XGBClassifier

# **Feature Extraction**

In [15]:
audio_list = []
target_list = []

METADATA_FILE_PATH = '/content/drive/MyDrive/Data Science Padova/Semester 3/Human Data Analytics/Project/Data/ESC-50-master/ESC-50-master/meta/esc50.csv'
AUDIO_FOLDER = '/content/drive/MyDrive/Data Science Padova/Semester 3/Human Data Analytics/Project/Data/ESC-50-master/ESC-50-master/audio/'

df = import_dataframe(METADATA_FILE_PATH)

data = []

for filename,target in tqdm(list(zip(df['filename'], df['target'])), desc = 'Importing dataset'):
        y, fs = librosa.load(AUDIO_FOLDER + filename, sr=SR)
        data.append((y,target))

audio_data = []
labels=[]

for i,j in data:
    audio_data.append(i)
    labels.append(j)

audio_data=np.array(audio_data) # array with size (n_data, n_samples). Each row contains an audio
labels=np.array(labels) # array with size (n_data), contains the indexes of the labels

import keras
ylabels=keras.utils.to_categorical(labels, num_classes=OUTPUT_CLASSES, dtype='float32')

from skimage import util

sub_sequence= SR*1.25    #  1.25 seconds of signal !
st=400 #  samples for sliding the window ith overlap
audio_data_red = []

for i in tqdm(range(0,len(audio_data)), desc = 'data reduction'):
    frames = util.view_as_windows(audio_data[i], window_shape=(sub_sequence,), step=st)
    frame_intensity = []
    for frame in frames:
        frame_intensity.append(frame @ frame)
    optim_frame_index = np.array(frame_intensity).argmax()
    audio_data_red.append(frames[optim_frame_index]/np.max(frames[optim_frame_index]))

Importing dataset: 100%|██████████| 400/400 [02:33<00:00,  2.60it/s]
data reduction: 100%|██████████| 400/400 [00:00<00:00, 684.96it/s]


In [52]:
def extract_featuresAvg(y, sr):

    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)

    n_fft = int(sr * 0.02)   # window length: 0.02 s
    hop_length = n_fft // 2  # usually one specifies the hop length as a fraction of the window length
    mfccs = librosa.feature.mfcc(y, sr=sr, n_mfcc=13, hop_length=hop_length, n_fft=n_fft)

    vector = np.empty
    vector = np.append(vector, np.mean(chroma_stft))
    vector = np.append(vector, np.mean(spec_cent))
    vector = np.append(vector,np.mean(spec_bw))
    vector = np.append(vector,np.mean(rolloff))
    vector = np.append(vector,np.mean(zcr))
    for e in mfccs:
            vector = np.append(vector,np.mean(e))
    vector = np.delete(vector,0,0)


    return vector

#Extract MFCC from spectogram
def extract_featuresMCC(y, sr):
    S = librosa.feature.melspectrogram(y, sr=sr, n_fft=2048,
                                       hop_length=512,
                                       n_mels=128)
    mfccs = librosa.feature.mfcc(S=librosa.power_to_db(S), n_mfcc=40)

    return mfccs


def get_mfcc(data, SAMPLE_RATE):
    try:
      # data = librosa.to_mono(data)

      ft1 = librosa.feature.mfcc(y=data, sr = SAMPLE_RATE, n_mfcc=30)
      ft2 = librosa.feature.zero_crossing_rate(y=data)[0]
      ft3 = librosa.feature.spectral_rolloff(y=data)[0]
      ft4 = librosa.feature.spectral_centroid(y=data)[0]
      ft5 = librosa.feature.spectral_contrast(y=data)[0]
      ft6 = librosa.feature.spectral_bandwidth(y=data)[0]
      ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.median(ft1, axis = 1), np.min(ft1, axis = 1)))
      ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.median(ft2), np.min(ft2)))
      ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.median(ft3), np.min(ft3)))
      ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.median(ft4), np.min(ft4)))
      ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.median(ft5), np.min(ft5)))
      ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.median(ft6), np.max(ft6)))
      return np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc))
    except Exception as e:
        print('naughty example')
        return pd.Series([0]*210)

In [29]:
labels[0]

0

In [54]:
X = []
dataset_dict = {}

print("Getting features ..")
for i,y in enumerate(audio_data_red):
    vector = get_mfcc(y, SR)
    X.append(vector)
    dataset_dict[i] = {'features' : vector,
                        'label' : labels[i]}


Getting features ..


In [60]:
dataset = pd.DataFrame.from_dict(dataset_dict, orient='index')
dataset = shuffle(dataset, random_state=42)
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 400 entries, 209 to 102
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   features  400 non-null    object
 1   label     400 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 9.4+ KB


In [61]:
dataset.head()

Unnamed: 0,features,label
209,"[-91.96038818359375, 225.06915283203125, -72.3...",8
280,"[-248.68446350097656, 228.56875610351562, -0.5...",8
33,"[31.999588012695312, 103.43012237548828, -8.01...",3
210,"[-192.52627563476562, -14.060248374938965, -33...",1
93,"[-115.32003021240234, 109.61209106445312, -28....",0


In [78]:
from sklearn.model_selection import train_test_split

X = np.array(dataset['features'].tolist())
y = dataset['label']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **SVM**

In [64]:
## Support Vector Classifier

svm_model = SVC()

svm_pipe = Pipeline([
       ('scale', StandardScaler()),
      ('reduce_dims', PCA()),
        ('model', svm_model)])

In [65]:
svm_pipe.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'scale', 'reduce_dims', 'model', 'scale__copy', 'scale__with_mean', 'scale__with_std', 'reduce_dims__copy', 'reduce_dims__iterated_power', 'reduce_dims__n_components', 'reduce_dims__n_oversamples', 'reduce_dims__power_iteration_normalizer', 'reduce_dims__random_state', 'reduce_dims__svd_solver', 'reduce_dims__tol', 'reduce_dims__whiten', 'model__C', 'model__break_ties', 'model__cache_size', 'model__class_weight', 'model__coef0', 'model__decision_function_shape', 'model__degree', 'model__gamma', 'model__kernel', 'model__max_iter', 'model__probability', 'model__random_state', 'model__shrinking', 'model__tol', 'model__verbose'])

In [66]:
param_grid = dict(reduce_dims__n_components=[100,150,210],
                  model__kernel = ['linear', 'rbf', 'poly'],
                  model__gamma = [0.01, 0.1, 0.5, 1],
                  model__degree = [1,2,3,4])

In [67]:
svm_grid = GridSearchCV(svm_pipe, n_jobs=1, param_grid=param_grid, cv=3, verbose=2,
                    return_train_score=True,
                    scoring= 'accuracy')

In [82]:
svm_grid.fit(X_train, y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=100; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=100; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=100; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=150; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=150; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=150; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=210; total time=   0.1s
[CV] END model__degree=1, model__gamma=0.01, model__kernel=linear, reduce_dims__n_components=210; total time=   0.1s
[

In [83]:
print(f'Best parameters: {svm_grid.best_params_}')

Best parameters: {'model__degree': 1, 'model__gamma': 0.01, 'model__kernel': 'rbf', 'reduce_dims__n_components': 210}


In [84]:
svm_results = svm_grid.best_estimator_.predict(X_test)

In [85]:
print(classification_report(y_test, svm_results))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.88      1.00      0.93         7
           2       0.60      0.33      0.43         9
           3       0.89      0.89      0.89         9
           4       0.79      1.00      0.88        11
           5       0.80      1.00      0.89         8
           6       1.00      0.57      0.73         7
           7       0.50      0.40      0.44         5
           8       0.60      0.75      0.67         8
           9       0.60      0.75      0.67         4

    accuracy                           0.79        80
   macro avg       0.76      0.76      0.75        80
weighted avg       0.79      0.79      0.77        80



In [86]:
svm_grid.best_score_

0.7844589431611121

# **Bayes Classifier**

In [87]:
bayes_model = GaussianNB()
bayes_pipe = Pipeline([
       ('scale', StandardScaler()),
      ('reduce_dims', PCA()),
        ('model', bayes_model)])

In [88]:
param_grid = dict(reduce_dims__n_components=[100,150,210])

In [89]:
bayes_grid = GridSearchCV(bayes_pipe, n_jobs=1, param_grid=param_grid, cv=3, verbose=2,
                    return_train_score=True,
                    scoring= 'accuracy')

In [90]:
bayes_grid.fit(X_train, y_train)

Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] END ......................reduce_dims__n_components=100; total time=   0.0s
[CV] END ......................reduce_dims__n_components=100; total time=   0.1s
[CV] END ......................reduce_dims__n_components=100; total time=   0.0s
[CV] END ......................reduce_dims__n_components=150; total time=   0.0s
[CV] END ......................reduce_dims__n_components=150; total time=   0.1s
[CV] END ......................reduce_dims__n_components=150; total time=   0.0s
[CV] END ......................reduce_dims__n_components=210; total time=   0.1s
[CV] END ......................reduce_dims__n_components=210; total time=   0.0s
[CV] END ......................reduce_dims__n_components=210; total time=   0.1s


In [91]:
print(f'Best parameters: {bayes_grid.best_params_}')

Best parameters: {'reduce_dims__n_components': 100}


In [92]:
bayes_results = bayes_grid.best_estimator_.predict(X_test)

In [93]:
print(classification_report(y_test, bayes_results))

              precision    recall  f1-score   support

           0       0.89      0.67      0.76        12
           1       0.50      0.14      0.22         7
           2       0.71      0.56      0.63         9
           3       0.89      0.89      0.89         9
           4       0.85      1.00      0.92        11
           5       0.53      1.00      0.70         8
           6       0.62      0.71      0.67         7
           7       0.40      0.40      0.40         5
           8       0.57      0.50      0.53         8
           9       0.80      1.00      0.89         4

    accuracy                           0.70        80
   macro avg       0.68      0.69      0.66        80
weighted avg       0.70      0.70      0.68        80



# **Random Forests**

In [95]:
rf_model = RandomForestClassifier()

rf_pipe = Pipeline([
       ('scale', StandardScaler()),
      ('reduce_dims', PCA()),
        ('model', rf_model)])

In [96]:
param_grid = dict(reduce_dims__n_components=[100,150,210],
                  model__bootstrap= [True, False],
                  model__max_depth= [20, 30, 50, 70, None],
                  model__max_features= ['auto', 'sqrt'],
                  model__min_samples_leaf= [1, 2, 4],
                  model__min_samples_split= [2, 5, 10],
                  model__n_estimators= [50,100,200,300])

In [97]:
rf_grid = GridSearchCV(rf_pipe, n_jobs=1, param_grid=param_grid, cv=3, verbose=2,
                    return_train_score=True,
                    scoring= 'accuracy')

In [98]:
rf_grid.fit(X_train, y_train)

Output hidden; open in https://colab.research.google.com to view.

In [99]:
print(f'Best parameters: {rf_grid.best_params_}')

Best parameters: {'model__bootstrap': False, 'model__max_depth': 50, 'model__max_features': 'sqrt', 'model__min_samples_leaf': 4, 'model__min_samples_split': 5, 'model__n_estimators': 200, 'reduce_dims__n_components': 100}


In [100]:
rf_results = rf_grid.best_estimator_.predict(X_test)

In [101]:
print(classification_report(y_test, rf_results))

              precision    recall  f1-score   support

           0       1.00      0.75      0.86        12
           1       1.00      0.71      0.83         7
           2       0.38      0.33      0.35         9
           3       0.80      0.89      0.84         9
           4       0.82      0.82      0.82        11
           5       0.70      0.88      0.78         8
           6       0.67      0.86      0.75         7
           7       0.75      0.60      0.67         5
           8       0.62      0.62      0.62         8
           9       0.67      1.00      0.80         4

    accuracy                           0.74        80
   macro avg       0.74      0.75      0.73        80
weighted avg       0.75      0.74      0.74        80



# **K-NN**

In [102]:
knn_model = KNeighborsClassifier()

knn_pipe = Pipeline([
       ('scale', StandardScaler()),
      ('reduce_dims', PCA()),
        ('model', knn_model)])

In [103]:
param_grid = dict(reduce_dims__n_components=[100,150,210],
                  model__weights = ['uniform', 'distance'],
                  model__metric = ['euclidean', 'manhattan'],
                  model__n_neighbors = [3, 5, 10, 15])

In [104]:
knn_grid = GridSearchCV(knn_pipe, n_jobs=1, param_grid=param_grid, cv=3, verbose=2,
                    return_train_score=True,
                    scoring= 'accuracy')

In [105]:
knn_grid.fit(X_train, y_train)

Fitting 3 folds for each of 48 candidates, totalling 144 fits
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=100; total time=   0.2s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=100; total time=   0.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=100; total time=   0.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=150; total time=   0.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=150; total time=   0.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=150; total time=   0.1s
[CV] END model__metric=euclidean, model__n_neighbors=3, model__weights=uniform, reduce_dims__n_components=210; total time=   0.1s
[CV] END model__metric=eucli

In [106]:
print(f'Best parameters: {knn_grid.best_params_}')

Best parameters: {'model__metric': 'euclidean', 'model__n_neighbors': 5, 'model__weights': 'distance', 'reduce_dims__n_components': 100}


In [107]:
knn_results = knn_grid.best_estimator_.predict(X_test)

In [108]:
print(classification_report(y_test, knn_results))

              precision    recall  f1-score   support

           0       0.83      0.83      0.83        12
           1       1.00      0.57      0.73         7
           2       0.42      0.56      0.48         9
           3       0.53      1.00      0.69         9
           4       0.67      0.73      0.70        11
           5       0.89      1.00      0.94         8
           6       1.00      0.71      0.83         7
           7       0.00      0.00      0.00         5
           8       0.50      0.50      0.50         8
           9       0.00      0.00      0.00         4

    accuracy                           0.66        80
   macro avg       0.58      0.59      0.57        80
weighted avg       0.64      0.66      0.63        80



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# **XGBoost**

In [109]:
xgb_model = XGBClassifier()

xgb_pipe = Pipeline([
       ('scale', StandardScaler()),
      ('reduce_dims', PCA()),
        ('model', xgb_model)])

In [110]:
param_grid = dict(reduce_dims__n_components=[100,150,210],
                  model__learning_rate = [0.05, 0.1, 0.2],
                  model__max_depth= [10, 20, 30, None],
                  model__n_estimators = [50, 100, 200],
                  model__subsample = [0.8, 0.9, 1.0])

In [111]:
xgb_grid = GridSearchCV(xgb_pipe, n_jobs=1, param_grid=param_grid, cv=3, verbose=2,
                    return_train_score=True,
                    scoring= 'accuracy')

In [112]:
xgb_grid.fit(X_train, y_train)

Fitting 3 folds for each of 324 candidates, totalling 972 fits
[CV] END model__learning_rate=0.05, model__max_depth=10, model__n_estimators=50, model__subsample=0.8, reduce_dims__n_components=100; total time=   6.6s
[CV] END model__learning_rate=0.05, model__max_depth=10, model__n_estimators=50, model__subsample=0.8, reduce_dims__n_components=100; total time=   2.3s
[CV] END model__learning_rate=0.05, model__max_depth=10, model__n_estimators=50, model__subsample=0.8, reduce_dims__n_components=100; total time=   2.3s
[CV] END model__learning_rate=0.05, model__max_depth=10, model__n_estimators=50, model__subsample=0.8, reduce_dims__n_components=150; total time=   5.5s
[CV] END model__learning_rate=0.05, model__max_depth=10, model__n_estimators=50, model__subsample=0.8, reduce_dims__n_components=150; total time=   3.9s
[CV] END model__learning_rate=0.05, model__max_depth=10, model__n_estimators=50, model__subsample=0.8, reduce_dims__n_components=150; total time=   3.3s
[CV] END model__lea

In [113]:
print(f'Best parameters: {xgb_grid.best_params_}')

Best parameters: {'model__learning_rate': 0.05, 'model__max_depth': 10, 'model__n_estimators': 200, 'model__subsample': 0.8, 'reduce_dims__n_components': 100}


In [114]:
xgb_results = xgb_grid.best_estimator_.predict(X_test)

In [115]:
print(classification_report(y_test, xgb_results))

              precision    recall  f1-score   support

           0       0.90      0.75      0.82        12
           1       0.71      0.71      0.71         7
           2       0.44      0.44      0.44         9
           3       0.89      0.89      0.89         9
           4       0.82      0.82      0.82        11
           5       0.62      0.62      0.62         8
           6       0.56      0.71      0.63         7
           7       1.00      0.60      0.75         5
           8       0.62      0.62      0.62         8
           9       0.50      0.75      0.60         4

    accuracy                           0.70        80
   macro avg       0.71      0.69      0.69        80
weighted avg       0.72      0.70      0.70        80

