In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_selection import SelectKBest, f_classif

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
from ipynb.fs.full.data_loading import load_data_from_csv

In [3]:
data = load_data_from_csv()

In [4]:
new_data = data.copy()

In [5]:
df = pd.DataFrame(columns=["label1", "label2", "label3", "label4", "DTaccuracy", "RFaccuracy", "MLPaccuracy", "GSaccuracy"])

In [9]:
labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
for i in range(len(labels)):
    for j in range (len(labels) - i - 1):
        for k in range(len(labels) - i - j - 2):
            for l in range(len(labels) - i - j - k - 3):
                print(labels[i], labels[i + j + 1], labels[i + j + k + 2], labels[i + j + k + l + 3])

                new_data_4genre = new_data[(new_data.label == labels[i]) | (new_data.label == labels[i + j + 1]) | (new_data.label == labels[i + j + k + 2]) | (new_data.label == labels[i + j + k + l + 3])]

                y = new_data_4genre['label']
                X = new_data_4genre.drop(labels=['label', 'filename', 'audio_data', 'audio_sr'],axis=1)
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=0)

                param_grid_dt = {'splitter': ['best', 'random'], 'max_depth': ['None', 2, 4, 8],
                                 'random_state': [0], 'min_samples_leaf': [1, 2, 4, 8]}
                grid_dt = GridSearchCV(DecisionTreeClassifier(), param_grid_dt, refit = True, verbose = 0, cv = 5)
                grid_dt.fit(X_train, y_train)
                y_pred_dt = grid_dt.predict(X_test)
                dt_accuracy = accuracy_score(y_test, y_pred_dt)
                print("DT accuracy:", dt_accuracy)

                param_grid_rf = {'n_estimators': [100, 500, 1000], 'max_depth' : [None, 2, 4, 8],
                                'random_state': [0], 'min_samples_leaf': [1, 2, 4, 8]}
                grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, refit = True, verbose = 0, cv = 5)
                grid_rf.fit(X_train, y_train)
                y_pred_rf = grid_rf.predict(X_test)
                rf_accuracy = accuracy_score(y_test, y_pred_rf)
                print("RF accuracy:", rf_accuracy)

                param_grid_mlp = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
                        'solver': ['lbgfs', 'sgd', 'adam'],
                        'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
                        'alpha': [0.0001, 0.05],
                        'learning_rate': ['constant', 'invscaling', 'adaptive']}  
                grid_mlp = GridSearchCV(MLPClassifier(), param_grid_mlp, refit = True, verbose = 0, cv=5)
                grid_mlp.fit(X_train, y_train)
                y_pred_mlp = grid_mlp.predict(X_test) 
                mlp_accuracy = accuracy_score(y_test, y_pred_mlp)
                print("MLP accuracy:", mlp_accuracy) 

                row = {"label1" : labels[i], "label2" : labels[i + j + 1], "label3" : labels[i + j + k + 2], "label4": labels[i + j + k + l + 3], 
                       "DTaccuracy" : dt_accuracy, "RFaccuracy" : rf_accuracy, "MLPaccuracy" : mlp_accuracy}
                df = df.append(row, ignore_index=True)

                print()

blues classical country disco
DT accuracy: 0.615625
RF accuracy: 0.803125
MLP accuracy: 0.353125

blues classical country hiphop
DT accuracy: 0.640625
RF accuracy: 0.8625
MLP accuracy: 0.4

blues classical country jazz
DT accuracy: 0.55625
RF accuracy: 0.759375
MLP accuracy: 0.284375

blues classical country metal
DT accuracy: 0.709375
RF accuracy: 0.821875
MLP accuracy: 0.428125

blues classical country pop
DT accuracy: 0.715625
RF accuracy: 0.821875
MLP accuracy: 0.35625

blues classical country reggae
DT accuracy: 0.646875
RF accuracy: 0.79375
MLP accuracy: 0.3125

blues classical country rock
DT accuracy: 0.60625
RF accuracy: 0.728125
MLP accuracy: 0.415625

blues classical disco hiphop
DT accuracy: 0.634375
RF accuracy: 0.771875
MLP accuracy: 0.471875

blues classical disco jazz
DT accuracy: 0.609375
RF accuracy: 0.771875
MLP accuracy: 0.325

blues classical disco metal
DT accuracy: 0.659375
RF accuracy: 0.859375
MLP accuracy: 0.4

blues classical disco pop
DT accuracy: 0.66875
RF

In [10]:
df.head()

Unnamed: 0,label1,label2,label3,label4,DTaccuracy,RFaccuracy,MLPaccuracy,GSaccuracy
0,blues,classical,country,disco,0.6125,0.8125,0.25625,0.403125
1,blues,classical,country,hiphop,0.5875,0.828125,0.25625,0.4875
2,blues,classical,country,jazz,0.546875,0.78125,0.25625,0.3
3,blues,classical,country,metal,0.7,0.853125,0.25625,0.465625
4,blues,classical,country,pop,0.690625,0.81875,0.25625,0.503125


In [11]:
df.to_csv('classification_data_4_genre.csv', index=False)