In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_selection import SelectKBest, f_classif

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
from ipynb.fs.full.data_loading import load_data_from_csv

In [3]:
data = load_data_from_csv()

In [4]:
new_data = data.copy()

In [5]:
df = pd.DataFrame(columns=["label1", "label2", "label3", "DTaccuracy", "RFaccuracy", "MLPaccuracy", "GSaccuracy"])

In [9]:
labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
for i in range(len(labels)):
    for j in range (len(labels) - i - 1):
        for k in range(len(labels) - i - j - 2):
            print(labels[i], labels[i + j + 1], labels[i + j + k + 2])
            
            new_data_3genre = new_data[(new_data.label == labels[i]) | (new_data.label == labels[i + j + 1]) | (new_data.label == labels[i + j + k + 2])]

            y = new_data_3genre['label']
            X = new_data_3genre.drop(labels=['label', 'filename', 'audio_data', 'audio_sr'],axis=1)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=0)

            param_grid_dt = {'splitter': ['best', 'random'], 'max_depth': ['None', 2, 4, 8],
                             'random_state': [0], 'min_samples_leaf': [1, 2, 4, 8]}
            grid_dt = GridSearchCV(DecisionTreeClassifier(), param_grid_dt, refit = True, verbose = 0, cv = 5)
            grid_dt.fit(X_train, y_train)
            y_pred_dt = grid_dt.predict(X_test)
            dt_accuracy = accuracy_score(y_test, y_pred_dt)
            print("DT accuracy:", dt_accuracy)

            param_grid_rf = {'n_estimators': [100, 500, 1000], 'max_depth' : [None, 2, 4, 8],
                            'random_state': [0], 'min_samples_leaf': [1, 2, 4, 8]}
            grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, refit = True, verbose = 0, cv = 5)
            grid_rf.fit(X_train, y_train)
            y_pred_rf = grid_rf.predict(X_test)
            rf_accuracy = accuracy_score(y_test, y_pred_rf)
            print("RF accuracy:", rf_accuracy)

            param_grid_mlp = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
                    'solver': ['lbgfs', 'sgd', 'adam'],
                    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
                    'alpha': [0.0001, 0.05],
                    'learning_rate': ['constant', 'invscaling', 'adaptive']}  
            grid_mlp = GridSearchCV(MLPClassifier(), param_grid_mlp, refit = True, verbose = 0, cv=5)
            grid_mlp.fit(X_train, y_train)
            y_pred_mlp = grid_mlp.predict(X_test) 
            mlp_accuracy = accuracy_score(y_test, y_pred_mlp)
            print("MLP accuracy:", mlp_accuracy) 

            row = {"label1" : labels[i], "label2" : labels[i + j + 1], "label3" : labels[i + j + k + 2], 
                   "DTaccuracy" : dt_accuracy, "RFaccuracy" : rf_accuracy, "MLPaccuracy" : mlp_accuracy}
            df = df.append(row, ignore_index=True)

            print()

blues classical country
DT accuracy: 0.6958333333333333
RF accuracy: 0.7916666666666666
MLP accuracy: 0.49583333333333335

blues classical disco
DT accuracy: 0.6625
RF accuracy: 0.875
MLP accuracy: 0.6541666666666667

blues classical hiphop
DT accuracy: 0.775
RF accuracy: 0.8833333333333333
MLP accuracy: 0.4666666666666667

blues classical jazz
DT accuracy: 0.75
RF accuracy: 0.8416666666666667
MLP accuracy: 0.35

blues classical metal
DT accuracy: 0.8041666666666667
RF accuracy: 0.8875
MLP accuracy: 0.475

blues classical pop
DT accuracy: 0.85
RF accuracy: 0.9708333333333333
MLP accuracy: 0.6333333333333333

blues classical reggae
DT accuracy: 0.6875
RF accuracy: 0.8875
MLP accuracy: 0.6041666666666666

blues classical rock
DT accuracy: 0.7041666666666667
RF accuracy: 0.8125
MLP accuracy: 0.45416666666666666

blues country disco
DT accuracy: 0.5833333333333334
RF accuracy: 0.675
MLP accuracy: 0.4375

blues country hiphop
DT accuracy: 0.5625
RF accuracy: 0.7666666666666667
MLP accuracy:

In [10]:
df.head()

Unnamed: 0,label1,label2,label3,DTaccuracy,RFaccuracy,MLPaccuracy,GSaccuracy
0,blues,classical,country,0.695833,0.820833,0.341667,0.358333
1,blues,classical,disco,0.729167,0.85,0.341667,0.479167
2,blues,classical,hiphop,0.854167,0.8875,0.341667,0.6375
3,blues,classical,jazz,0.6125,0.808333,0.341667,0.566667
4,blues,classical,metal,0.845833,0.895833,0.341667,0.591667


In [11]:
df.to_csv('classification_data_3_genre.csv', index=False)