In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_selection import SelectKBest, f_classif

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
from ipynb.fs.full.data_loading import get_usable_data

In [3]:
data = get_usable_data()

In [4]:
new_data = data.copy()

In [5]:
df = pd.DataFrame(columns=["label1", "label2", "label3", "DTaccuracy", "RFaccuracy", "MLPaccuracy", "GSaccuracy"])

In [6]:
labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
for i in range(len(labels)):
    for j in range (len(labels) - i - 1):
        for k in range(len(labels) - i - j - 2):
            print(labels[i], labels[i + j + 1], labels[i + j + k + 2])
            
            new_data_3genre = new_data[(new_data.label == labels[i]) | (new_data.label == labels[i + j + 1]) | (new_data.label == labels[i + j + k + 2])]

            y = new_data_3genre['label']
            X = new_data_3genre.drop(labels=['label', 'filename', 'audio_data', 'audio_sr'],axis=1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

            dt = DecisionTreeClassifier()
            dt = dt.fit(X_train, y_train)
            y_pred_dt = dt.predict(X_test)
            dtaccuracy = accuracy_score(y_test, y_pred_dt)
            print("DT accuracy:", dtaccuracy)

            rf = RandomForestClassifier()
            rf = rf.fit(X_train, y_train)
            y_pred_rf = dt.predict(X_test)
            rfaccuracy = accuracy_score(y_test, y_pred_rf)
            print("RF accuracy:", rfaccuracy)

            mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
            mlp.fit(X_train, y_train)
            y_pred_mlp = mlp.predict(X_test)
            mlpaccuracy = accuracy_score(y_test, y_pred_mlp)
            print("MLP accuracy:", mlpaccuracy)


            param_grid = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
                    'solver': ['lbgfs', 'sgd', 'adam'],
                    'learning_rate': ['constant', 'invscaling', 'adaptive']}  
            grid = GridSearchCV(MLPClassifier(), param_grid, refit = True, verbose = 0, cv=5)
            grid.fit(X_train, y_train)
            y_pred_gs = grid.predict(X_test) 
            gsaccuracy = accuracy_score(y_test, y_pred_gs)
            print("Grid accuracy:", gsaccuracy) 
            #print("Grid classification:", classification_report(y_test, grid_predictions)) 

            row = {"label1" : labels[i], "label2" : labels[i + j + 1], "label3" : labels[i + j + k + 2], 
                   "DTaccuracy" : dtaccuracy, "RFaccuracy" : rfaccuracy, "MLPaccuracy" : mlpaccuracy, "GSaccuracy" : gsaccuracy}
            df = df.append(row, ignore_index=True)

            print()

blues classical country
DT accuracy: 0.7272727272727273
RF accuracy: 0.7272727272727273
MLP accuracy: 0.36363636363636365
Grid accuracy: 0.29292929292929293

blues classical disco
DT accuracy: 0.8484848484848485
RF accuracy: 0.8484848484848485
MLP accuracy: 0.36363636363636365
Grid accuracy: 0.35353535353535354

blues classical hiphop
DT accuracy: 0.8383838383838383
RF accuracy: 0.8383838383838383
MLP accuracy: 0.36363636363636365
Grid accuracy: 0.47474747474747475

blues classical jazz
DT accuracy: 0.7878787878787878
RF accuracy: 0.7878787878787878
MLP accuracy: 0.37373737373737376
Grid accuracy: 0.42424242424242425

blues classical metal
DT accuracy: 0.8585858585858586
RF accuracy: 0.8585858585858586
MLP accuracy: 0.36363636363636365
Grid accuracy: 0.3434343434343434

blues classical pop
DT accuracy: 0.9191919191919192
RF accuracy: 0.9191919191919192
MLP accuracy: 0.36363636363636365
Grid accuracy: 0.6565656565656566

blues classical reggae
DT accuracy: 0.898989898989899
RF accuracy:

In [7]:
df.head()

Unnamed: 0,label1,label2,label3,DTaccuracy,RFaccuracy,MLPaccuracy,GSaccuracy
0,blues,classical,country,0.727273,0.727273,0.363636,0.292929
1,blues,classical,disco,0.848485,0.848485,0.363636,0.353535
2,blues,classical,hiphop,0.838384,0.838384,0.363636,0.474747
3,blues,classical,jazz,0.787879,0.787879,0.373737,0.424242
4,blues,classical,metal,0.858586,0.858586,0.363636,0.343434


In [8]:
df.to_csv('classification_data_3_genre.csv', index=False)