In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_selection import SelectKBest, f_classif

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
from ipynb.fs.full.data_loading import get_usable_data

In [3]:
data = get_usable_data()

In [4]:
new_data = data.copy()

In [5]:
df = pd.DataFrame(columns=["label1", "label2", "label3", "label4", "DTaccuracy", "RFaccuracy", "MLPaccuracy", "GSaccuracy"])

In [6]:
labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
for i in range(len(labels)):
    for j in range (len(labels) - i - 1):
        for k in range(len(labels) - i - j - 2):
            for l in range(len(labels) - i - j - k - 3):
                print(labels[i], labels[i + j + 1], labels[i + j + k + 2], labels[i + j + k + l + 3])

                new_data_4genre = new_data[(new_data.label == labels[i]) | (new_data.label == labels[i + j + 1]) | (new_data.label == labels[i + j + k + 2]) | (new_data.label == labels[i + j + k + l + 3])]

                y = new_data_4genre['label']
                X = new_data_4genre.drop(labels=['label', 'filename', 'audio_data', 'audio_sr'],axis=1)
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

                dt = DecisionTreeClassifier()
                dt = dt.fit(X_train, y_train)
                y_pred_dt = dt.predict(X_test)
                dtaccuracy = accuracy_score(y_test, y_pred_dt)
                print("DT accuracy:", dtaccuracy)

                rf = RandomForestClassifier()
                rf = rf.fit(X_train, y_train)
                y_pred_rf = dt.predict(X_test)
                rfaccuracy = accuracy_score(y_test, y_pred_rf)
                print("RF accuracy:", rfaccuracy)

                mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
                mlp.fit(X_train, y_train)
                y_pred_mlp = mlp.predict(X_test)
                mlpaccuracy = accuracy_score(y_test, y_pred_mlp)
                print("MLP accuracy:", mlpaccuracy)


                param_grid = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
                        'solver': ['lbgfs', 'sgd', 'adam'],
                        'learning_rate': ['constant', 'invscaling', 'adaptive']}  
                grid = GridSearchCV(MLPClassifier(), param_grid, refit = True, verbose = 0, cv=5)
                grid.fit(X_train, y_train)
                y_pred_gs = grid.predict(X_test) 
                gsaccuracy = accuracy_score(y_test, y_pred_gs)
                print("Grid accuracy:", gsaccuracy) 
                #print("Grid classification:", classification_report(y_test, grid_predictions)) 

                row = {"label1" : labels[i], "label2" : labels[i + j + 1], "label3" : labels[i + j + k + 2], "label4": labels[i + j + k + l + 3], 
                       "DTaccuracy" : dtaccuracy, "RFaccuracy" : rfaccuracy, "MLPaccuracy" : mlpaccuracy, "GSaccuracy" : gsaccuracy}
                df = df.append(row, ignore_index=True)

                print()

blues classical country disco
DT accuracy: 0.6515151515151515
RF accuracy: 0.6515151515151515
MLP accuracy: 0.22727272727272727
Grid accuracy: 0.3181818181818182

blues classical country hiphop
DT accuracy: 0.7954545454545454
RF accuracy: 0.7954545454545454
MLP accuracy: 0.22727272727272727
Grid accuracy: 0.4772727272727273

blues classical country jazz
DT accuracy: 0.5909090909090909
RF accuracy: 0.5909090909090909
MLP accuracy: 0.22727272727272727
Grid accuracy: 0.4090909090909091

blues classical country metal
DT accuracy: 0.7727272727272727
RF accuracy: 0.7727272727272727
MLP accuracy: 0.22727272727272727
Grid accuracy: 0.5227272727272727

blues classical country pop
DT accuracy: 0.7727272727272727
RF accuracy: 0.7727272727272727
MLP accuracy: 0.22727272727272727
Grid accuracy: 0.4393939393939394

blues classical country reggae
DT accuracy: 0.6590909090909091
RF accuracy: 0.6590909090909091
MLP accuracy: 0.22727272727272727
Grid accuracy: 0.42424242424242425

blues classical countr

In [7]:
df.head()

Unnamed: 0,label1,label2,label3,label4,DTaccuracy,RFaccuracy,MLPaccuracy,GSaccuracy
0,blues,classical,country,disco,0.651515,0.651515,0.227273,0.318182
1,blues,classical,country,hiphop,0.795455,0.795455,0.227273,0.477273
2,blues,classical,country,jazz,0.590909,0.590909,0.227273,0.409091
3,blues,classical,country,metal,0.772727,0.772727,0.227273,0.522727
4,blues,classical,country,pop,0.772727,0.772727,0.227273,0.439394


In [8]:
df.to_csv('classification_data_4_genre.csv', index=False)