In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score

from pathlib import Path
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report

msdi_path = "../data/msdi/"

In [2]:
def load_deep_audio_features(entry, msdi_path, X_train,X_val,X_test ):
    data_set = entry['set']
    idx = entry['deep_features']
    if data_set == 'train':
        return X_train[idx, :].tolist()
    if data_set == 'val':
        return X_val[idx,:].tolist()
    else:
        return X_test[idx,:].tolist()

In [3]:
df_entry = pd.read_csv(msdi_path+"msdi_mapping.csv")
df_entry = df_entry.drop(["Unnamed: 0"], axis = 1)

In [4]:
df_entry.iloc[0].values

array(['TRABKJU128F422A7FE', 'Metal', 0, 'train', 'ARBD4QW1187FB42153',
       'http://artwork-cdn.7static.com/static/img/sleeveart/00/008/487/0000848744_200.jpg',
       'img/0000848744_200.jpg', 'mfcc/mfcc_A.npz', 0], dtype=object)

In [5]:
X_train = np.load(msdi_path + "deep_features/X_train_audio_MSD-I.npy")
X_val = np.load(msdi_path + "deep_features/X_val_audio_MSD-I.npy")
X_test = np.load(msdi_path + "deep_features/X_test_audio_MSD-I.npy")

In [6]:
train_idx = df_entry.index[df_entry['set'] == "train"].tolist()
test_idx = df_entry.index[df_entry['set'] == "test"].tolist()
val_idx = df_entry.index[df_entry['set'] == "val"].tolist()

In [7]:
X = df_entry.apply(lambda entry :
                            load_deep_audio_features(entry, msdi_path, X_train, X_val, X_test), 
                            raw =True,
                            result_type='expand',
                            axis=1 )

In [9]:
X.shape

(30712, 2048)

In [8]:
label = df_entry['genre'].array
# print(label.shape)

le = LabelEncoder()
le.fit(label)
class_names = le.classes_
print("Classes : ", le.classes_, "\n")
y_classes = le.transform(label)

print( y_classes.shape)
#y = to_categorical(y_classes)

Classes :  ['Blues' 'Country' 'Electronic' 'Folk' 'Jazz' 'Latin' 'Metal' 'New Age'
 'Pop' 'Punk' 'Rap' 'Reggae' 'RnB' 'Rock' 'World'] 

(30712,)


In [10]:
X = X.values

In [11]:
X_train, y_train = X[train_idx], y_classes[train_idx]
X_test, y_test = X[test_idx], y_classes[test_idx]
X_val, y_val = X[val_idx], y_classes[val_idx]


In [97]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=42, max_iter=1000, verbose = 2, warm_start=True)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(clf.score(X_test,y_test))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0.4902129490212949


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.7min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.7min finished


In [98]:
print(classification_report(y_test, y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.31      0.09      0.14       190
     Country       0.35      0.39      0.37       194
  Electronic       0.57      0.69      0.63       708
        Folk       0.13      0.12      0.13       136
        Jazz       0.48      0.43      0.45       461
       Latin       0.11      0.04      0.06        81
       Metal       0.68      0.67      0.67       374
     New Age       0.00      0.00      0.00        38
         Pop       0.35      0.41      0.38       463
        Punk       0.22      0.07      0.11        95
         Rap       0.67      0.74      0.71       379
      Reggae       0.53      0.46      0.49       263
         RnB       0.49      0.31      0.38       395
        Rock       0.46      0.62      0.53       827
       World       0.05      0.02      0.03        45

    accuracy                           0.49      4649
   macro avg       0.36      0.34      0.34      4649
weighted avg       0.47   

In [99]:
clf.fit(X_val, y_val)
y_pred = clf.predict(X)
print(clf.score(X,y_classes))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.3s finished


0.49446470435009116


In [100]:
print(classification_report(y_classes, y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.24      0.10      0.14       825
     Country       0.46      0.37      0.41      1778
  Electronic       0.56      0.67      0.61      4801
        Folk       0.24      0.15      0.19      1144
        Jazz       0.47      0.48      0.48      2671
       Latin       0.28      0.09      0.14       550
       Metal       0.62      0.69      0.65      2617
     New Age       0.22      0.13      0.16       265
         Pop       0.37      0.48      0.42      3429
        Punk       0.28      0.20      0.23       713
         Rap       0.76      0.73      0.75      2674
      Reggae       0.58      0.50      0.53      1699
         RnB       0.42      0.27      0.33      1836
        Rock       0.46      0.53      0.49      5213
       World       0.15      0.09      0.12       497

    accuracy                           0.49     30712
   macro avg       0.41      0.37      0.38     30712
weighted avg       0.48   

In [101]:
Y_proba = clf.predict_proba(X)
print(Y_proba.shape)

(30712, 15)


In [112]:
df_proba = pd.DataFrame(data = Y_proba, columns = class_names)

In [113]:
df_final = df_entry.join(df_proba)

In [115]:
Y_pred = clf.predict(X)


In [116]:
Y_pred = le.inverse_transform(Y_pred)
df_final["deep_audio_features_genre"] = Y_pred

In [117]:
df_final.drop(['album_index',
       'msd_artist_id', 'image_url', 'img', 'mfcc', 'deep_features'], axis=1, inplace=True)

In [119]:
export_csv = df_final.to_csv (r'../data/deep_audio_features_results.csv', index = None, header=True)