In [1]:
import pandas as pd
import numpy as np
import plotly.figure_factory as ff

import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

In [44]:
rhythmic_patterns_all = pd.read_csv('data/tsv/os_patterns.tsv', sep = '\t', index_col = 0).fillna(0)
rhythmic_patterns_all.drop('no', axis = 1, inplace = True)
rhythmic_patterns_all.rename(lambda x: x+'_all', axis = 'columns', inplace = True)
both_hands = rhythmic_patterns_all.columns.to_list()

In [45]:
rhythmic_patterns_left = pd.read_csv('data/tsv/os_patterns_left.tsv', sep = '\t', index_col = 0).fillna(0)
rhythmic_patterns_left.drop('no', axis = 1, inplace = True)
rhythmic_patterns_left.rename(lambda x: x+'_left', axis = 'columns', inplace = True)
left_hand = rhythmic_patterns_all.columns.to_list()

In [46]:
rhythmic_patterns_right = pd.read_csv('data/tsv/os_patterns_right.tsv',sep = '\t', index_col = 0).fillna(0)
rhythmic_patterns_right.drop('no', axis = 1, inplace = True)
rhythmic_patterns_right.rename(lambda x: x+'_right', axis = 'columns', inplace = True)
right_hand = rhythmic_patterns_all.columns.to_list()

In [47]:
files = pd.read_csv('data/tsv/files.tsv', sep='\t')

In [48]:
files.head()

Unnamed: 0,id,D,no,dance,path,gt_mode,entropy,duration_entropy,onset_entropy,num_keys,...,TitiTitiTiti_right,TitiTitino_right,TitigiTa_right,TitigiTi_right,TitigiTigitigi_right,TitigiTiti_right,TitigiTitigi_right,Triole_right,nogiTimgi_right,nogiTiti_right
0,1,41,1,menuett,041/D041menuett01a.mscx,major,0.456823,1.547558,1.693202,3,...,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,41,1,trio,041/D041trio01b.mscx,major,0.578475,1.18684,2.01186,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,41,2,menuett,041/D041menuett02a.mscx,major,0.564445,1.41572,1.521429,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,41,2,trio,041/D041trio02b.mscx,major,0.6127,0.947097,2.14705,7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,41,3,menuett,041/D041menuett03a.mscx,major,0.588911,1.508821,1.763593,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Select the features

In [49]:
features = files.dropna()

Galopp and Cotillon appear only few times -> drop.
Ecossaise is easy to identify by meter -> drop

In [50]:
indexNames = features[(features['dance'] == 'galopp') | (features['dance'] == 'cotillon') | (features['dance'] == 'ecossaise')].index
features.drop(indexNames , inplace=True)

Add a numerical label because it is needed for the classification

In [51]:
dance_dict = {'deutscher':0, 'ländler':2, 'menuett':3, 'trio':4, 'walzer':5}

def label_dance(row):
    return dance_dict[row]

features['dance_num_label'] = features['dance'].apply (lambda row: label_dance(row))

Visualize the discriminativity of the features

In [52]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from matplotlib import colors

def tSNE(feature_matrix, number_of_dances=8):
    tsne_features = TSNE(n_components=2).fit_transform(feature_matrix)    
    features['TSNE1'] = tsne_features[:, 0]
    features['TSNE2'] = tsne_features[:, 1]

Make features a matrix. Save labels as an array.

In [53]:
selected_features = ['ratio_downbeat_non_downbeat', 'ratio_downbeat_non_downbeat_onset', 
                     'ratio_downbeat_non_downbeat_strictly','num_keys','event_density',
                     'sixths_count', 'thirds_count',
                     'downbeat_dur', 'entropy',
                     'duration_entropy', 'onset_entropy',
                     'maj-min-first', 'frac_zumpapa', 'frac_one_two',
                     'interval_downbeat_offbeat', 'eigth_notes_frac',
                     'maj_min_first_abs', 'start_end_key', 'onset_density']


In [112]:
feature_matrix = features[selected_features + right_hand + left_hand].to_numpy()
#feature_matrix = features.drop(columns=['dance', 'dance_num_label', 'path', 'gt_mode', 'id', 'D', 'no']).to_numpy()
labels = features[['dance_num_label']].to_numpy()
labels = np.ravel(labels)

Standardize the features

In [113]:
scaler = StandardScaler()
scaler.fit(feature_matrix)
feature_matrix_scaled = scaler.transform(feature_matrix)

Split into train and test

In [114]:
n_splits = 5

In [115]:
kf = KFold(n_splits=n_splits, shuffle=True)

In [116]:
n_classes = len(np.unique(labels))
global_confusion = np.zeros((n_classes, n_classes))
kappa_global = 0

for train_index, test_index in kf.split(feature_matrix_scaled):
    features_train, features_test = feature_matrix_scaled[train_index], feature_matrix_scaled[test_index]
    labels_train, labels_test = labels[train_index], labels[test_index]
    
    clf = svm.SVC(gamma='scale', C=100, decision_function_shape='ovr')
    clf.fit(features_train, labels_train)
    labels_predicted = clf.predict(features_test)
   
    cm = confusion_matrix(labels_test, labels_predicted)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # Normalize confusion map
    cm = np.around(cm, decimals=2)
    
    global_confusion = global_confusion + cm
    kappa_global = kappa_global + cohen_kappa_score(labels_test, labels_predicted)
    
kappa_global = kappa_global / n_splits
global_confusion = global_confusion / n_splits
global_confusion = np.around(global_confusion, decimals=2)

In [117]:
dance_list = list(dance_dict.keys())

fig = ff.create_annotated_heatmap(z=global_confusion, x=dance_list, y=dance_list)

#fig = go.Figure(data=go.Heatmap(z=cm, x=dance_list, y=dance_list))
fig.update_layout(title='Confusion Map For Dance Classification',
                  xaxis_title="Predicted labels", yaxis_title="True labels")
fig.data[0].update(zmin=0, zmax=1)
fig.show()

In [118]:
kappa_global

0.2746790044055003