# **Music Theme Recognition**

Chuang Caleb
20204134

## **Package Imports**

In [490]:
import pandas as pd
import numpy as np

from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import ClusterCentroids

from skmultilearn.adapt import MLkNN
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, hamming_loss, f1_score


## **Choosing Parameters**

In [491]:
target_label = 'risk'
feature_start = 'Number_of_Pitches'
feature_end = 'Metrical_Diversity'
k_value = 3
rand_state = 14

## **Loading Data**

### Importing Dataset

#### Features

In [492]:
# Access song_theme_feature_database
song_theme_feature_database_path = '../data/features/song_theme_feature_database.csv'
raw_feature_df = pd.read_csv(song_theme_feature_database_path)


#### Labels

In [493]:
# Access song_theme_labels_database
song_theme_label_database_path = '../data/song_theme_label_database.xlsx'
raw_label_df = pd.read_excel(song_theme_label_database_path)

### Prepare datasets

Select the features for this iteration

In [494]:
# Feature Selection
feature_df = raw_feature_df.loc[:,feature_start:feature_end]


Select the labels for this iteration

In [495]:
# Filter for only recognized samples in labels_df
main_label_df = raw_label_df[raw_label_df.recognizable == 1]

# Filter out unused labels and metadata
main_label_df.drop(
    columns=main_label_df.columns[19:], axis=1, inplace=True)  # unused labels
main_label_df.drop(
    columns=main_label_df.columns[0:4], axis=1, inplace=True)  # metadata

# label_df = main_label_df[target_label]
label_df = main_label_df[[target_label]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Convert to numpy type

In [496]:
feature_np = feature_df.to_numpy()
label_np = label_df.to_numpy().astype(int)


Get ids of samples

In [497]:
ids_df = raw_feature_df['sample']


### Prepare variables

Splitting dataset

In [498]:
(x_train, x_test, y_train, y_test, ids_train, ids_test) = train_test_split(
    feature_np, label_np, ids_df, test_size=0.2, random_state=rand_state)

Sampling data for balanced dataset

In [499]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=0,shrinkage= 0.4)
X_resampled, y_resampled = ros.fit_resample(x_train, y_train)

from collections import Counter
print(sorted(Counter(y_resampled).items()))

[(0, 83), (1, 83)]


In [500]:
# from imblearn.under_sampling import RandomUnderSampler
# rus = RandomUnderSampler(random_state=0)
# X_resampled, y_resampled = rus.fit_resample(x_train, y_train)

# from collections import Counter
# print(sorted(Counter(y_resampled).items()))

## Preparing Model

In [501]:
# parameters = {'k': range(3,5), 's': [0.5, 0.7, 1.0]}
# score = 'f1_macro'
# from imblearn.ensemble import BalancedRandomForestClassifier

# classifier = GridSearchCV(MLkNN(), parameters, scoring=score)
classifier = MLkNN()
# classifier = BalancedRandomForestClassifier(n_estimators=500, random_state=rand_state)
classifier.fit(x_train, y_train)

# print (classifier.best_params_, classifier.best_score_)
# classifier.fit(X_resampled, y_resampled)

# predict
# predictions = classifier.predict(x_test)
predictions = classifier.predict(x_test).asformat('array')

In [502]:
# scores = cross_val_score(classifier, x_test, y_test, cv=3, scoring='accuracy')

print('Accuracy: ' + str(accuracy_score(y_test, predictions)))
print('Haming Loss: ' +  str(hamming_loss(y_test, predictions)))
print('F1-score: ' +  str(f1_score(y_test, predictions)))


flat_predictions = [val for sublist in predictions for val in sublist]
flat_y_true = [val for sublist in y_test for val in sublist]
# print(flat_predictions)
# print(flat_y_true)



Accuracy: 0.8846153846153846
Haming Loss: 0.11538461538461539
F1-score: 0.0


## Evaluation

In [503]:
results = pd.DataFrame(
    {'sample': ids_test, 'y_true': flat_y_true, 'y_pred': flat_predictions})
    # {'sample': ids_test, 'y_true': y_test, 'y_pred': predictions})

results = results.sort_index()

results

Unnamed: 0,sample,y_true,y_pred
2,A-Whole-New-World-(Theme-From-'Aladdin')_bitmi...,0,0
7,ABBA.Dancing queen K_bitmidi.mid,0,0
14,adele-someone_like_you_bitmidi.mid,0,0
15,Ain't-No-Mountain-High-Enough_bitmidi.mid,0,0
18,Aladdin(ArabianNights)_bitmidi.mid,1,0
20,All-I-Ask-Of-You-(From-'The-Phantom-Of-The-Ope...,0,0
23,AMRINGER.Can't help falling in love_bitmidi.mid,0,0
29,Avatar: The Last Airbender - Leaves From The V...,0,0
34,B.TYLER.HoldinG out for hero K_bitmidi.mid,0,0
42,BENSON.Nothing's gonna change my love for you_...,0,0
