**Feature Engineering**

In [2]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

E:\etc\conda\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
E:\etc\conda\lib\site-packages\numpy\.libs\libopenblas.TXA6YQSD3GCQQC22GEQ54J2UDCXDXHWN.gfortran-win_amd64.dll
  stacklevel=1)
Using TensorFlow backend.


In [3]:
genres = ['Acid Techno','Ambient Techno','Detroit Techno','Industiral Techno','Minimal Techno','Tech House']
for g in genres:
    pathlib.Path(f'E:/techno/img_data/{g}').mkdir(parents=True, exist_ok=True)   #create path to store spectrograms

In [25]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = ['Acid Techno','Ambient Techno','Detroit Techno','Industiral Techno','Minimal Techno','Tech House']
for g in genres:  
    for filename in os.listdir(f'E:/techno/{g}'):
        songname = f'E:/techno/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'E:/techno/img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf() #store spectrograms for every audio data

<Figure size 720x720 with 0 Axes>

In [36]:
header = 'chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()  #create headers for the feature data file

In [38]:
file = open('E:/techno/data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = ['Acid Techno','Ambient Techno','Detroit Techno','Industiral Techno','Minimal Techno','Tech House']
for g in genres:
    for filename in os.listdir(f'E:/techno/{g}'):
        songname = f'E:/techno/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('E:/techno/data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split()) #get all the relevant features and write in a csv file

*Data Preprocessing*

In [9]:
data = pd.read_csv('E:/techno/data.csv')
data.shape

(602, 27)

In [10]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list) 

In [11]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float)) #standardize the data

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) #train, test split

*Suport Vector Machine*

In [13]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV 
  
# defining parameter range 
param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf','linear']}  
  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 0) 
  
# fitting the model for grid search 
grid.fit(X_train, y_train)  

GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf', 'linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [14]:
print(grid.best_params_) 

{'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}


In [15]:
from sklearn.metrics import classification_report, confusion_matrix 
grid_predictions = grid.predict(X_test) 
  
# print classification report 
print(classification_report(y_test, grid_predictions))

              precision    recall  f1-score   support

           0       0.23      0.27      0.25        11
           1       0.36      0.40      0.38        10
           2       0.00      0.00      0.00         5
           3       0.55      0.55      0.55        11
           4       0.62      0.36      0.45        14
           5       0.00      0.00      0.00        10

    accuracy                           0.30        61
   macro avg       0.29      0.26      0.27        61
weighted avg       0.34      0.30      0.31        61



*Random Forest*

In [16]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score

In [17]:
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [18]:
#Perform grid search to find the best hyperparameters 
rf = RandomForestClassifier()
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)

In [19]:
rf_random.fit(X_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  8.8min finished


RandomizedSearchCV(cv=3, error_score='raise-deprecating',
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
                                                    n_estimators='warn',
                                                    n_jobs=None

In [20]:
rf_random.best_params_

{'n_estimators': 600,
 'min_samples_split': 10,
 'min_samples_leaf': 2,
 'max_features': 'auto',
 'max_depth': 100,
 'bootstrap': True}

In [26]:
parameters = {'bootstrap': True,
              'min_samples_leaf': 2,
              'n_estimators': 600, 
              'min_samples_split': 10,
              'max_features': 'auto',
              'max_depth': 60,
              'max_leaf_nodes': None}
RF_model = RandomForestClassifier(**parameters)
RF_model.fit(X_train, y_train)
RF_predictions = RF_model.predict(X_test)
print(classification_report(y_test, RF_predictions)) #Build random forest model under best parameter set

              precision    recall  f1-score   support

           0       0.50      0.18      0.27        11
           1       0.36      0.50      0.42        10
           2       0.33      0.60      0.43         5
           3       0.53      0.73      0.62        11
           4       0.55      0.43      0.48        14
           5       0.00      0.00      0.00        10

    accuracy                           0.39        61
   macro avg       0.38      0.41      0.37        61
weighted avg       0.40      0.39      0.37        61



*Extreme Gradient Boosting*

In [31]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

In [150]:
xg_train = xgb.DMatrix(X_train, label=y_train)
xg_test = xgb.DMatrix(X_test, label=y_test)
# setup parameters for xgboost
params = {
    # Parameters that we are going to tune.
    'objective': 'multi:softmax',
    'max_depth': 3,
    'min_child_weight': 1,
    'n_estimators': 200,
    'num_class': 6,
    'learning_rate': .01,
    'eta':.3,
    'subsample': 0.8,
    'colsample_bytree': 0.5
}
params['eval_metric'] = "mae"
num_boost_round = 10
watchlist = [(xg_train, 'train'), (xg_test, 'test')]
model = xgb.train(params, xg_train, num_boost_round=num_boost_round)

In [151]:
# get prediction
pred = model.predict(xg_test)
error_rate = np.sum(pred != y_test) / y_test.shape[0]
print('Test error using softmax = {}'.format(error_rate))

Test error using softmax = 0.6065573770491803


In [152]:
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.29      0.18      0.22        11
           1       0.50      0.50      0.50        10
           2       0.25      0.60      0.35         5
           3       0.55      0.55      0.55        11
           4       0.60      0.43      0.50        14
           5       0.18      0.20      0.19        10

    accuracy                           0.39        61
   macro avg       0.39      0.41      0.39        61
weighted avg       0.42      0.39      0.40        61

