In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_selection import SelectKBest, f_classif

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
from ipynb.fs.full.data_loading import get_usable_data

In [3]:
data = get_usable_data()

In [4]:
new_data = data.copy()

In [5]:
df = pd.DataFrame(columns=["label1", "label2", "DTaccuracy", "RFaccuracy", "MLPaccuracy", "GSaccuracy"])

In [6]:
labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
for i in range(len(labels)):
    for j in range(len(labels) - i - 1):
                
        print(labels[i], labels[i + j + 1])
        
        new_data_2genre = new_data[(new_data.label == labels[i]) | (new_data.label == labels[i + j + 1])]

        y = new_data_2genre['label']
        X = new_data_2genre.drop(labels=['label', 'filename', 'audio_data', 'audio_sr'],axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)
        
        dt = DecisionTreeClassifier()
        dt = dt.fit(X_train, y_train)
        y_pred_dt = dt.predict(X_test)
        dtaccuracy = accuracy_score(y_test, y_pred_dt)
        print("DT accuracy:", dtaccuracy)
                
        rf = RandomForestClassifier()
        rf = rf.fit(X_train, y_train)
        y_pred_rf = dt.predict(X_test)
        rfaccuracy = accuracy_score(y_test, y_pred_rf)
        print("RF accuracy:", rfaccuracy)
        
        mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
        mlp.fit(X_train, y_train)
        y_pred_mlp = mlp.predict(X_test)
        mlpaccuracy = accuracy_score(y_test, y_pred_mlp)
        print("MLP accuracy:", mlpaccuracy)
        
        
        param_grid = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
                'solver': ['lbgfs', 'sgd', 'adam'],
                'learning_rate': ['constant', 'invscaling', 'adaptive']}  
        grid = GridSearchCV(MLPClassifier(), param_grid, refit = True, verbose = 0, cv=5)
        grid.fit(X_train, y_train)
        y_pred_gs = grid.predict(X_test) 
        gsaccuracy = accuracy_score(y_test, y_pred_gs)
        print("Grid accuracy:", gsaccuracy) 
                
        row = {"label1" : labels[i], "label2" : labels[i + j + 1], "DTaccuracy" : dtaccuracy, "RFaccuracy" : rfaccuracy, "MLPaccuracy" : mlpaccuracy, "GSaccuracy" : gsaccuracy}
        df = df.append(row, ignore_index=True)
        
        print()
        

blues classical
DT accuracy: 0.8333333333333334
RF accuracy: 0.8333333333333334
MLP accuracy: 0.48484848484848486
Grid accuracy: 0.5757575757575758

blues country
DT accuracy: 0.7727272727272727
RF accuracy: 0.7727272727272727
MLP accuracy: 0.48484848484848486
Grid accuracy: 0.5303030303030303

blues disco
DT accuracy: 0.7878787878787878
RF accuracy: 0.7878787878787878
MLP accuracy: 0.48484848484848486
Grid accuracy: 0.5606060606060606

blues hiphop
DT accuracy: 0.8333333333333334
RF accuracy: 0.8333333333333334
MLP accuracy: 0.48484848484848486
Grid accuracy: 0.6060606060606061

blues jazz
DT accuracy: 0.8484848484848485
RF accuracy: 0.8484848484848485
MLP accuracy: 0.5
Grid accuracy: 0.5606060606060606

blues metal
DT accuracy: 0.8333333333333334
RF accuracy: 0.8333333333333334
MLP accuracy: 0.48484848484848486
Grid accuracy: 0.5151515151515151

blues pop
DT accuracy: 0.9848484848484849
RF accuracy: 0.9848484848484849
MLP accuracy: 0.48484848484848486
Grid accuracy: 0.621212121212121

In [7]:
df.head()

Unnamed: 0,label1,label2,DTaccuracy,RFaccuracy,MLPaccuracy,GSaccuracy
0,blues,classical,0.833333,0.833333,0.484848,0.575758
1,blues,country,0.772727,0.772727,0.484848,0.530303
2,blues,disco,0.787879,0.787879,0.484848,0.560606
3,blues,hiphop,0.833333,0.833333,0.484848,0.606061
4,blues,jazz,0.848485,0.848485,0.5,0.560606


In [9]:
df.to_csv('classification_data_2_genre.csv', index=False)