# Objective Evaluation Metrics (ver. 2023.03.03)

In [1]:
import muspy as mu
import os, glob, mido
import numpy as np
from mido import MidiFile
import pretty_midi as pr
import matplotlib
from matplotlib import pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
import seaborn as sns
from tqdm import tqdm
from math import log10,floor
from pathlib import Path
import pandas as pd

In [2]:
# Definitions
def round_sig(x, sig=3):
     return round(x, sig-int(floor(log10(abs(x))))-1)

### Single Midi File Metrics: muspy

In [None]:
# # Read midi file to muspy
# file="/data/data1/users/astais/Unprocessed-Datasets/Primers/elise_format0.mid"
# file="/data/data1/users/astais/Unprocessed-Datasets/classic-piano/clementi_opus36_6_2_format0"+".mid"
# file="/data/data1/users/astais/Midi-Outputs/perceiver-ar/GiantMIDI-Piano_128_256_0.7_20221115_190435_3*.mid"
file="/data/data1/users/astais/Unprocessed-Datasets/Rock-Piano-MIDI-Dataset/Rock-Piano-01581.mid"
mid = MidiFile(file)
# print(mid)
print(mid.length)
mus=mu.from_mido(mid, duplicate_note_mode='fifo')
mus.print()

# Muspy Metrics
print('\nObjective Metrics - Pitch-related')

print("Pitch Range: "+str(mu.pitch_range(mus)))
print("Pitches Used: "+str(mu.n_pitches_used(mus)))
print("Pitch Classes Used: "+str(mu.n_pitch_classes_used(mus)))
print("Polyphony: "+str(mu.polyphony(mus)))
print('Polyphonic Rate: '+str(mu.polyphony_rate(mus,2)))
print('Scale Consistency: '+str(mu.scale_consistency(mus)))
print('Pitch Entropy: '+str(mu.pitch_entropy(mus)))
print('Pitch Class Entropy: '+str(mu.pitch_class_entropy(mus)))

print('\nObjective Metrics - Rhythm-related')
print("Empty Beat Rate: "+str(mu.empty_beat_rate(mus)))

### Single File Metrics: pretty-midi

In [None]:
midi=pr.PrettyMIDI(file)
musi=mu.from_pretty_midi(midi)
print(musi)

# Best tempo estimate in bpm
print("Tempo Estimation: "+str(midi.estimate_tempo()/2))

# Relative amount of each semitone across the entire song, a proxy for key
# total_velocity = sum(sum(midi.get_chroma()))
# print([sum(semitone)/total_velocity for semitone in midi.get_chroma()])

# Cannot be used in output midi
# print("Tempo Changes: "+str(midi.get_tempo_changes()))

# Empirical estimate of tempos and each tempo’s probability
# print(midi.estimate_tempi())


# print(midi.get_beats())


print(midi.get_pitch_class_histogram())

print(midi.estimate_beat_start())

# for i in range(12):
#     print(pr.key_number_to_key_name(i))
    
# pitch_classes={0:"C",1:"C#",2:"D",3:"D#",4:"E",5:"F",6:"F#",7:"G",8:"G#",9:"A",10:"A#",11:"B"}
pitch_classes=["C","C#","D","D#","E","F","F#","G","G#","A","A#","B"]

## Calculating Metrics

### Set Basic Directories (Input/Output Dataset)

In [None]:
# # Directories for Input Dataset
# dataset='GiantMIDI-Piano'
# # Input dataset midi files directory
# basic_dir='/data/data1/users/astais/Unprocessed-Datasets/'
# directory=basic_dir+dataset
# # Basic Directory for saving arrays and plots
# array_save_dir="/data/data1/users/astais/Objective-Evaluation/metrics-files/"
# plot_save_dir='/data/data1/users/astais/Objective-Evaluation/plots/single-dataset/'+dataset+'/'

# Directories for Ouput Dataset
model='music-transformer'
dataset=model+'_'+"adl-piano-midi"
# Ouput dataset midi files directory
directory='/data/data1/users/astais/Midi-Outputs/'+model+'/'+dataset+'/no_primer/'
# Basic Directory for saving arrays and plots
array_save_dir="/data/data1/users/astais/Objective-Evaluation/metrics-files/"
plot_save_dir='/data/data1/users/astais/Objective-Evaluation/plots/single-output-dataset/'+dataset+'/'

# Create dir for saving plots
if not os.path.exists(plot_save_dir):
    os.makedirs(plot_save_dir)

### Set Metrics

In [4]:
metrics=['Pitch Range','Pitches Used','Pitch Classes',
         'Polyphony','Polyphony Rate','Scale Consistency',
         'Pitch Entropy','Pitch Class Entropy',
         'Empty Bar Rate','Tempo Estimation']

### Calculate Muspy Metrics on given Input/Output Dataset

In [None]:
# Count number of midis in directory
mid_num=len([f for f in Path(directory).rglob('*.mid*')])

# Metrics array: 1 dimension for indexing files, 9 metrics parameters
mus_metr=np.zeros((mid_num,11))

# Song Num-Name Dictionary
num_name={}

# iterate over files in directory and save muspy metrics
count=-1   
for f in tqdm(Path(directory).rglob('*.mid*'),total=mid_num,
              desc="Processing "+str(mid_num)+" midi files"):
        count+=1
        mus_metr[count][0]=count+1
        # Update num-name song dictionary
        num_name.update({count+1:str(f)})
        try:
            midi=pr.PrettyMIDI(str(f))
            mus=mu.from_mido(MidiFile(f), duplicate_note_mode='fifo')
            
            # Save Metrics
            mus_metr[count][1]=mu.pitch_range(mus)
            mus_metr[count][2]=mu.n_pitches_used(mus)
            mus_metr[count][3]=mu.n_pitch_classes_used(mus)
            mus_metr[count][4]=mu.polyphony(mus)
            mus_metr[count][5]=mu.polyphony_rate(mus,2)
            mus_metr[count][6]=mu.scale_consistency(mus)
            mus_metr[count][7]=mu.pitch_entropy(mus)
            mus_metr[count][8]=mu.pitch_class_entropy(mus)
            mus_metr[count][9]=mu.empty_beat_rate(mus)
            mus_metr[count][10]=midi.estimate_tempo()/2
        except KeyboardInterrupt:
            print('Keyboard Interrupt.')
            break
        except:
#             # Error happened, rename the midi file
#             error=True
#             print(str(f))
#             os.rename(str(f),str(f).split('.mid')[0]+'.unused')
#             print(str(f).split('.mid')[0]+'.unused')
            
            # Error happened, delete this line and continue
            print("Error happened. File: "+num_name.get(count+1))
#             del num_name[count+1]
#             mus_metr=np.delete(mus_metr,count, 0)
            ################### Only for Los-Angeles-MIDI-Dataset-segment
            # Comment out after using this dataset
#             os.remove(str(f))
#             count-=1
            
# if(error==True):
#     print('Please rerun the cell before continuing.')
print('Done!')


### Clear Muspy Metrics Array Nan Values

In [None]:
# Check if there are Nan values in mus matrix and delete corrrupted files lines
nan=np.argwhere(np.isnan(mus_metr))
# Start from the end to not change the indexes of the files above
nan=reversed(np.unique(nan[:,0]))
print(nan)
mus_del=mus_metr
for f in nan:
    mus_del=np.delete(mus_del,f, 0)
#     file=num_name.get(f)
#     print(file)
#     os.rename(str(file),str(file).split('.mid')[0]+'.unused')
#     print(str(file).split('.mid')[0]+'.unused')
print(np.argwhere(np.isnan(mus_del)))
    

### Save Muspy Metrics Array on File

In [None]:
with open(array_save_dir+'metrics_'+dataset+'.npy', 'wb') as f:
    np.save(f, mus_del)

### Load Muspy Metrics Array from File

In [None]:
with open(array_save_dir+'metrics_'+dataset+'.npy', 'rb') as f:
    mus_del = np.load(f)
print(mus_del)

## Single Dataset Histogram Plots

In [None]:
# Transposing the metrics matrix and delete the indexing row
mus_metr_t = np.delete(mus_del.T, 0, 0)
# print(mus_metr_t)
mus_mean=np.mean(mus_metr_t,axis=1)
mus_std=np.std(mus_metr_t,axis=1)
# print(np.ptp(mus_metr_t, axis=1)) # Μέγιστη απόκλιση μεταξύ 2 στοιχείων, ψιλοαδιάφορο

# Υλοποίηση: Να εκτυπώνεται ο τίτλος των x καλύτερων κομματιών σε μια παράμετρο
# percentile(a, q[, axis, out, ...])
# quantile(a, q[, axis, out, overwrite_input, ...])

#Creating plots
matplotlib.rc_file_defaults()
# sns.set_theme()
metric_num=-1
for metric in tqdm(metrics):

    metric_num+=1
    
    # Discrete Metrics 
    if metric in ['Pitch Classes']:
#         print(metric)
#         print(mus_metr_t[metric_num])
        mu, sigma = round_sig(mus_mean[metric_num]),round_sig(mus_std[metric_num],1)

        sns.set(rc={'figure.figsize':(8,6)})
        plt=sns.histplot(mus_metr_t[metric_num], 
                     color = 'darkblue', 
                     stat="probability", discrete=True, kde_kws=dict(cut=3),
            alpha=.4, edgecolor=(1, 1, 1, .4))
        plt.set(xlabel =metric, title =dataset+': '+metric+" Probability")
        plt.xaxis.set_minor_locator(AutoMinorLocator(5))
        plt.yaxis.set_minor_locator(AutoMinorLocator(5))
        plt.grid(which='minor', alpha=0.3)
        plt.grid(which='major', alpha=0.6)
        # Display mu,sigma
#         plt.text(.03, .95, r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma),ha='left', va='top', transform=plt.transAxes)
        plt.legend(['Histogram Bars'],loc='best',title=r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma))
#         sns.move_legend(plt, "best", bbox_to_anchor=(0.5, -0.1),fancybox=True, shadow=True)
        plt.figure.savefig(plot_save_dir+dataset+"_"+metric+"_probability.png",dpi=300)
        plt.figure.show()
        plt.figure.clf()

    # Continuous Metrics
    else:
#         print(metric)
#         print(mus_metr_t[metric_num])
        mu, sigma = round_sig(mus_mean[metric_num]),round_sig(mus_std[metric_num])

        sns.set(rc={'figure.figsize':(8,6)})

        plt=sns.histplot(mus_metr_t[metric_num], kde=True, 
                     color = 'darkblue', 
                     stat="density", kde_kws=dict(cut=3),
            alpha=.4, edgecolor=(1, 1, 1, .4))
        plt.set(xlabel =metric, title =dataset+': '+metric+" Density")
        plt.xaxis.set_minor_locator(AutoMinorLocator(5))
        plt.yaxis.set_minor_locator(AutoMinorLocator(5))
        plt.grid(which='minor', alpha=0.3)
        plt.grid(which='major', alpha=0.6)
        # Display mu,sigma
#         plt.text(.03, .95, r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma),ha='left', va='top', transform=plt.transAxes)
        plt.legend(['Kernel Density Estimation','Histogram Bars'],loc='best',title=r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma))
#         sns.move_legend(plt, "best",bbox_to_anchor=(0.5, -0.1),fancybox=True, shadow=True)
#         , loc='upper center', bbox_to_anchor=(0.5, -0.1),
#           fancybox=True, shadow=True, ncol=2)
        plt.figure.savefig(plot_save_dir+dataset+"_"+metric+"_density.png",dpi=300)
        plt.figure.show()
        plt.figure.clf()
print("Done!")
        

## Dataset Trained Models Histogram Plots

### Set Directories

In [None]:
# Directories for Ouput Dataset
dataset="ailabs1k7"
basic_directory="/data/data1/users/astais/Objective-Evaluation/"
# Basic Directory for saving arrays and plots
array_save_dir=basic_directory+"plots/dataset-trained-models/"
plot_save_dir=basic_directory+"plots/dataset-trained-models/"+dataset+'/'

# Create dir for saving plots
if not os.path.exists(plot_save_dir):
    os.makedirs(plot_save_dir)

#open dataset metrics
with open(basic_directory+"metrics-files/"+'metrics_'+dataset+'.npy', 'rb') as f:
    mus_dataset = np.load(f)
#open output perceiver model metrics
with open(basic_directory+"metrics-files/"+'metrics_'+"music-transformer_"+dataset+'.npy', 'rb') as f:
    mus_model_1 = np.load(f)
#open output perceiver model metrics
with open(basic_directory+"metrics-files/"+'metrics_'+"perceiver-ar_"+dataset+'.npy', 'rb') as f:
    mus_model_2 = np.load(f)

### Statistical Processing Muspy Metrics

In [None]:
# Transposing the metrics matrix and delete the indexing row
mus_dataset_t = np.delete(mus_dataset.T, 0, 0)
mus_model_1_t = np.delete(mus_model_1.T, 0, 0)
mus_model_2_t = np.delete(mus_model_2.T, 0, 0)

mus_mean=np.mean(mus_dataset_t,axis=1)
mus_std=np.std(mus_dataset_t,axis=1)
mus_mean_2=np.mean(mus_model_1_t,axis=1)
mus_std_2=np.std(mus_model_1_t,axis=1)
mus_mean_3=np.mean(mus_model_2_t,axis=1)
mus_std_3=np.std(mus_model_2_t,axis=1)

#Creating plots
metrics=['Pitch Range','Pitches Used','Pitch Classes',
         'Polyphony','Polyphony Rate','Scale Consistency',
         'Pitch Entropy','Pitch Class Entropy',
         'Empty Bar Rate','Tempo Estimation']

legend_pos={'Pitch Range':'best','Pitches Used':'best','Pitch Classes':'upper left',
         'Polyphony':'best','Polyphony Rate':'best','Scale Consistency':'upper left',
         'Pitch Entropy':'upper left','Pitch Class Entropy':'upper left',
         'Empty Bar Rate':'best','Tempo Estimation':'upper left'}

matplotlib.rc_file_defaults()
metric_num=-1
for metric in tqdm(metrics):

    metric_num+=1
    
    # Discrete Metrics 
    if metric in ['Pitch Classes']:
        mu, sigma = round_sig(mus_mean[metric_num]),round_sig(mus_std[metric_num],1)
        mu2, sigma2 = round_sig(mus_mean_2[metric_num]),round_sig(mus_std_2[metric_num])
        mu3, sigma3 = round_sig(mus_mean_3[metric_num]),round_sig(mus_std_3[metric_num])

        sns.set(rc={'figure.figsize':(8,6)})
        plt=sns.histplot(mus_dataset_t[metric_num], color='r', stat="probability", discrete=True, kde_kws=dict(cut=3),
            alpha=.4,element="step")
        plt=sns.histplot(mus_model_1_t[metric_num], color='g', stat="probability", discrete=True, kde_kws=dict(cut=3),
            alpha=.4 ,element="step")
        plt=sns.histplot(mus_model_2_t[metric_num], color='b', stat="probability", discrete=True, kde_kws=dict(cut=3),
            alpha=.4,element="step")
        plt.set(xlabel =metric, title =dataset+' Trained Models: '+metric+" Probability")
        plt.xaxis.set_minor_locator(AutoMinorLocator(5))
        plt.yaxis.set_minor_locator(AutoMinorLocator(5))
        plt.grid(which='minor', alpha=0.3)
        plt.grid(which='major', alpha=0.6)
        # Display mu,sigma
        plt.legend(['Training Dataset:\n'+r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma),
                    'Music-Transformer Outputs:\n'+r'$\mu='+str(mu2)+',\ \sigma=$'+str(sigma2),
                    'Perceiver-AR Outputs:\n'+r'$\mu='+str(mu3)+',\ \sigma=$'+str(sigma3)],loc=legend_pos.get(metric),title="Histogram Bars")
        plt.figure.savefig(plot_save_dir+dataset+"_trained-models_"+metric+"_probability.png",dpi=300)
        plt.figure.show()
        plt.figure.clf()

    # Continuous Metrics
    else:
        mu, sigma = round_sig(mus_mean[metric_num]),round_sig(mus_std[metric_num])
        mu2, sigma2 = round_sig(mus_mean_2[metric_num]),round_sig(mus_std_2[metric_num])
        mu3, sigma3 = round_sig(mus_mean_3[metric_num]),round_sig(mus_std_3[metric_num])

        sns.set(rc={'figure.figsize':(8,6)})

        plt=sns.kdeplot(mus_dataset_t[metric_num], fill=True,color='r',clip=(0,float('inf')))
        plt=sns.kdeplot(mus_model_1_t[metric_num], fill=True,color='g',clip=(0,float('inf')))
        plt=sns.kdeplot(mus_model_2_t[metric_num], fill=True,color='b',clip=(0,float('inf')))
        plt.set(xlabel =metric, title =dataset+' Trained Models: '+metric+" Density")
        plt.xaxis.set_minor_locator(AutoMinorLocator(5))
        plt.yaxis.set_minor_locator(AutoMinorLocator(5))
        plt.grid(which='minor', alpha=0.3)
        plt.grid(which='major', alpha=0.6)
        # Display mu,sigma
        plt.legend(['Training Dataset:\n'+r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma),
                    'Music-Transformer Outputs:\n'+r'$\mu='+str(mu2)+',\ \sigma=$'+str(sigma2),
                    'Perceiver-AR Outputs:\n'+r'$\mu='+str(mu3)+',\ \sigma=$'+str(sigma3)],loc=legend_pos.get(metric),title="Kernel Density Estimation")
        plt.figure.savefig(plot_save_dir+dataset+"_trained-models_"+metric+"_density.png",dpi=300)
        plt.figure.show()
        plt.figure.clf()
print("Done!")
        

## Creating Dataframe

### Set Directories

In [None]:
# Directories for Ouput Dataset
basic_directory="/data/data1/users/astais/Objective-Evaluation/"
# Basic Directory for saving plots
plot_save_dir=basic_directory+"plots/"

### Open Metrics Files

In [None]:
datasets=['adl-piano-midi','ailabs1k7','GiantMIDI-Piano',
          'Los-Angeles-MIDI-Dataset-segment','maestro-v3.0.0','Rock-Piano-MIDI-Dataset']
#open dataset metrics
n=0
mus_datasets=[]
mus_datasets_df=[]
for dataset in datasets:
    with open(basic_directory+"metrics-files/"+'metrics_'+dataset+'.npy', 'rb') as f:
        f2=np.load(f)
        mus_datasets_df.append(f2)
        mus_datasets.append(np.delete(f2.T,0,0))
        print(mus_datasets[n]) # datasets 0-5
        n+=1

for dataset in datasets:
    with open(basic_directory+"metrics-files/"+'metrics_music-transformer_'+dataset+'.npy', 'rb') as f:
        f2=np.load(f)
        mus_datasets_df.append(f2)
        mus_datasets.append(np.delete(f2.T,0,0))
        print(mus_datasets[n]) # datasets 0-5
        n+=1

for dataset in datasets:
    with open(basic_directory+"metrics-files/"+'metrics_perceiver-ar_'+dataset+'.npy', 'rb') as f:
        f2=np.load(f)
        mus_datasets_df.append(f2)
        mus_datasets.append(np.delete(f2.T,0,0))
        print(mus_datasets[n]) # datasets 0-5
        n+=1  

### Create Dataframe

In [None]:
# print(type(mus_datasets_df[0].tolist()))
# print(mus_datasets_df[0].tolist())

# Training Datasets
df0 = pd.DataFrame(mus_datasets_df[0], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df0.insert(loc=0, column='Dataset', value=["adl-piano-midi"] * len(mus_datasets_df[0]))
df0.insert(loc=1, column='Dataset Type', value=["Training Dataset"] * len(mus_datasets_df[0]))

df1 = pd.DataFrame(mus_datasets_df[1], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df1.insert(loc=0, column='Dataset', value=['ailabs1k7'] * len(mus_datasets_df[1]))
df1.insert(loc=1, column='Dataset Type', value=["Training Dataset"] * len(mus_datasets_df[1]))

df2 = pd.DataFrame(mus_datasets_df[2], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df2.insert(loc=0, column='Dataset', value=['GiantMIDI-Piano'] * len(mus_datasets_df[2]))
df2.insert(loc=1, column='Dataset Type', value=["Training Dataset"] * len(mus_datasets_df[2]))

df3 = pd.DataFrame(mus_datasets_df[3], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df3.insert(loc=0, column='Dataset', value=['Los-Angeles-MIDI-\nDataset-segment'] * len(mus_datasets_df[3]))
df3.insert(loc=1, column='Dataset Type', value=["Training Dataset"] * len(mus_datasets_df[3]))

df4 = pd.DataFrame(mus_datasets_df[4], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df4.insert(loc=0, column='Dataset', value=['maestro-v3.0.0'] * len(mus_datasets_df[4]))
df4.insert(loc=1, column='Dataset Type', value=["Training Dataset"] * len(mus_datasets_df[4]))

df5 = pd.DataFrame(mus_datasets_df[5], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df5.insert(loc=0, column='Dataset', value=['Rock-Piano-\nMIDI-Dataset'] * len(mus_datasets_df[5]))
df5.insert(loc=1, column='Dataset Type', value=["Training Dataset"] * len(mus_datasets_df[5]))

# Music Transformer Outputs
df6 = pd.DataFrame(mus_datasets_df[6], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df6.insert(loc=0, column='Dataset', value=["adl-piano-midi"] * len(mus_datasets_df[6]))
df6.insert(loc=1, column='Dataset Type', value=["Music Transformer Outputs"] * len(mus_datasets_df[6]))

df7 = pd.DataFrame(mus_datasets_df[7], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df7.insert(loc=0, column='Dataset', value=['ailabs1k7'] * len(mus_datasets_df[7]))
df7.insert(loc=1, column='Dataset Type', value=["Music Transformer Outputs"] * len(mus_datasets_df[7]))

df8 = pd.DataFrame(mus_datasets_df[8], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df8.insert(loc=0, column='Dataset', value=['GiantMIDI-Piano'] * len(mus_datasets_df[8]))
df8.insert(loc=1, column='Dataset Type', value=["Music Transformer Outputs"] * len(mus_datasets_df[8]))

df9 = pd.DataFrame(mus_datasets_df[9], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df9.insert(loc=0, column='Dataset', value=['Los-Angeles-MIDI-\nDataset-segment'] * len(mus_datasets_df[9]))
df9.insert(loc=1, column='Dataset Type', value=["Music Transformer Outputs"] * len(mus_datasets_df[9]))

df10 = pd.DataFrame(mus_datasets_df[10], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df10.insert(loc=0, column='Dataset', value=['maestro-v3.0.0'] * len(mus_datasets_df[10]))
df10.insert(loc=1, column='Dataset Type', value=["Music Transformer Outputs"] * len(mus_datasets_df[10]))

df11 = pd.DataFrame(mus_datasets_df[11], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df11.insert(loc=0, column='Dataset', value=['Rock-Piano-\nMIDI-Dataset'] * len(mus_datasets_df[11]))
df11.insert(loc=1, column='Dataset Type', value=["Music Transformer Outputs"] * len(mus_datasets_df[11]))

# Perceiver-AR Outputs
df12 = pd.DataFrame(mus_datasets_df[12], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df12.insert(loc=0, column='Dataset', value=["adl-piano-midi"] * len(mus_datasets_df[12]))
df12.insert(loc=1, column='Dataset Type', value=["Perceiver-AR Outputs"] * len(mus_datasets_df[12]))

df13 = pd.DataFrame(mus_datasets_df[13], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df13.insert(loc=0, column='Dataset', value=['ailabs1k7'] * len(mus_datasets_df[13]))
df13.insert(loc=1, column='Dataset Type', value=["Perceiver-AR Outputs"] * len(mus_datasets_df[13]))

df14 = pd.DataFrame(mus_datasets_df[14], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df14.insert(loc=0, column='Dataset', value=['GiantMIDI-Piano'] * len(mus_datasets_df[14]))
df14.insert(loc=1, column='Dataset Type', value=["Perceiver-AR Outputs"] * len(mus_datasets_df[14]))

df15 = pd.DataFrame(mus_datasets_df[15], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df15.insert(loc=0, column='Dataset', value=['Los-Angeles-MIDI-\nDataset-segment'] * len(mus_datasets_df[15]))
df15.insert(loc=1, column='Dataset Type', value=["Perceiver-AR Outputs"] * len(mus_datasets_df[15]))

df16 = pd.DataFrame(mus_datasets_df[16], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df16.insert(loc=0, column='Dataset', value=['maestro-v3.0.0'] * len(mus_datasets_df[16]))
df16.insert(loc=1, column='Dataset Type', value=["Perceiver-AR Outputs"] * len(mus_datasets_df[16]))

df17 = pd.DataFrame(mus_datasets_df[17], columns=['Number','Pitch Range','Pitches Used','Pitch Classes','Polyphony',
                                     'Polyphony Rate','Scale Consistency','Pitch Entropy',
                                     'Pitch Class Entropy','Empty Bar Rate','Tempo Estimation'])
df17.insert(loc=0, column='Dataset', value=['Rock-Piano-\nMIDI-Dataset'] * len(mus_datasets_df[17]))
df17.insert(loc=1, column='Dataset Type', value=["Perceiver-AR Outputs"] * len(mus_datasets_df[17]))


df=pd.concat([df0,df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12,df13,df14,df15,df16,df17])
print(df)

# Save dataframe to pickle file
df.to_pickle(basic_directory+'/metrics-files/dataframe.pkl')

## Creating Metrics Tables & Plots

In [6]:
# load datafram from pickle file
basic_directory="/data/data1/users/astais/Objective-Evaluation/"
df = pd.read_pickle(basic_directory+'/metrics-files/dataframe.pkl')
print(df)

                       Dataset          Dataset Type  Number  Pitch Range  \
0               adl-piano-midi      Training Dataset     1.0         43.0   
1               adl-piano-midi      Training Dataset     2.0         58.0   
2               adl-piano-midi      Training Dataset     3.0         67.0   
3               adl-piano-midi      Training Dataset     4.0         31.0   
4               adl-piano-midi      Training Dataset     5.0         38.0   
..                         ...                   ...     ...          ...   
595  Rock-Piano-\nMIDI-Dataset  Perceiver-AR Outputs   596.0         37.0   
596  Rock-Piano-\nMIDI-Dataset  Perceiver-AR Outputs   597.0         34.0   
597  Rock-Piano-\nMIDI-Dataset  Perceiver-AR Outputs   598.0         48.0   
598  Rock-Piano-\nMIDI-Dataset  Perceiver-AR Outputs   599.0         34.0   
599  Rock-Piano-\nMIDI-Dataset  Perceiver-AR Outputs   600.0         33.0   

     Pitches Used  Pitch Classes  Polyphony  Polyphony Rate  \
0           

### Statistic Measures Tables

In [8]:
df.drop('Number', inplace=True, axis=1)
pd.set_option('display.precision', 3)
datasets=['adl-piano-midi','ailabs1k7','GiantMIDI-Piano',
          'Los-Angeles-MIDI-\nDataset-segment','maestro-v3.0.0','Rock-Piano-\nMIDI-Dataset']
for dataset in datasets:
    df_sub = df[(df["Dataset"] == dataset) & (df["Dataset Type"] == "Training Dataset")].describe()
    df_sub2 = df[(df["Dataset"] == dataset) & (df["Dataset Type"] == "Music Transformer Outputs")].describe()
    df_sub3 = df[(df["Dataset"] == dataset) & (df["Dataset Type"] == "Perceiver-AR Outputs")].describe()
    
    df_sub.drop('count', inplace=True, axis=0)
    df_sub2.drop('count', inplace=True, axis=0)
    df_sub3.drop('count', inplace=True, axis=0)
    # Cannot read name correclty due to \n
    if(dataset=='Los-Angeles-MIDI-\nDataset-segment'):
        df_sub.to_excel("Los-Angeles-MIDI-Dataset-segment_Training Dataset.xlsx")
        df_sub2.to_excel("Los-Angeles-MIDI-Dataset-segment_Music Transformer Outputs.xlsx")
        df_sub3.to_excel("Los-Angeles-MIDI-Dataset-segment_Perceiver-AR Outputs.xlsx")
        continue
    if(dataset=='Rock-Piano-\nMIDI-Dataset'):
        df_sub.to_excel("Rock-Piano-MIDI-Dataset_Training Dataset.xlsx")
        df_sub2.to_excel("Rock-Piano-MIDI-Dataset_Music Transformer Outputs.xlsx")
        df_sub3.to_excel("Rock-Piano-MIDI-Dataset_Perceiver-AR Outputs.xlsx")
        continue
        
    df_sub.to_excel(dataset+"_Training Dataset.xlsx")
    df_sub2.to_excel(dataset+"_Music Transformer Outputs.xlsx")
    df_sub3.to_excel(dataset+"_Perceiver-AR Outputs.xlsx")

### Kernel Density Plots

In [None]:
# mus_mean=[]
# mus_std=[]
# for i in range(6):
#     mus_mean.append(np.mean(mus_datasets[i],axis=1))
#     mus_std.append(np.std(mus_datasets[i],axis=1))

# #Creating plots
# metrics=['Pitch Range','Pitches Used','Pitch Classes',
#          'Polyphony','Polyphony Rate','Scale Consistency',
#          'Pitch Entropy','Pitch Class Entropy',
#          'Empty Bar Rate','Tempo Estimation']

# legend_pos={'Pitch Range':'best','Pitches Used':'best','Pitch Classes':'upper left',
#          'Polyphony':'best','Polyphony Rate':'best','Scale Consistency':'upper left',
#          'Pitch Entropy':'upper left','Pitch Class Entropy':'upper left',
#          'Empty Bar Rate':'best','Tempo Estimation':'upper left'}

# colors=['r','b','g','y','c','m']

# matplotlib.rc_file_defaults()
# metric_num=-1
# for metric in tqdm(metrics):
#     metric_num+=1
    
    
#     i=0
#     sns.set(rc={'figure.figsize':(8,6)})
    
#     mu,sigma=[],[]
#     for i in range(6): # for each dataset calculate mean and std
#         mu.append(round_sig(mus_mean[i][metric_num]))
#         sigma.append(round_sig(mus_std[i][metric_num]))
#         plt=sns.kdeplot(mus_datasets[i][metric_num],fill=True,color=colors[i],clip=(0,float('inf')))
        
        
    
# #     # Discrete Metrics 
# #     if metric in ['Pitch Classes']:
# #         mu, sigma = round_sig(mus_mean[metric_num]),round_sig(mus_std[metric_num],1)

# #         sns.set(rc={'figure.figsize':(8,6)})
# #         plt=sns.histplot(mus_metr_t[metric_num], 
# #                      color = 'darkblue', 
# #                      stat="probability", discrete=True, kde_kws=dict(cut=3),
# #             alpha=.4, edgecolor=(1, 1, 1, .4))
# #         plt.set(xlabel =metric, title =dataset+': '+metric+" Probability")
# #         plt.xaxis.set_minor_locator(AutoMinorLocator(5))
# #         plt.yaxis.set_minor_locator(AutoMinorLocator(5))
# #         plt.grid(which='minor', alpha=0.3)
# #         plt.grid(which='major', alpha=0.6)
# #         # Display mu,sigma
# # #         plt.text(.03, .95, r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma),ha='left', va='top', transform=plt.transAxes)
# #         plt.legend(['Histogram Bars'],loc='best',title=r'$\mu='+str(mu)+',\ \sigma=$'+str(sigma))
# # #         sns.move_legend(plt, "best", bbox_to_anchor=(0.5, -0.1),fancybox=True, shadow=True)
# #         plt.figure.savefig(plot_save_dir+dataset+"_"+metric+"_probability.png",dpi=300)
# #         plt.figure.show()
# #         plt.figure.clf()

# #     # Continuous Metrics
# #     else:

#     plt.set(xlabel =metric, title ='Datasets: '+metric+" Density")
#     plt.xaxis.set_minor_locator(AutoMinorLocator(5))
#     plt.yaxis.set_minor_locator(AutoMinorLocator(5))
#     plt.grid(which='minor', alpha=0.3)
#     plt.grid(which='major', alpha=0.6)
#     # Display mu,sigma
#     plt.legend([datasets[0]+' :\n'+r'$\mu='+str(mu[0])+',\ \sigma=$'+str(sigma[0]),
#                 datasets[1]+' :\n'+r'$\mu='+str(mu[1])+',\ \sigma=$'+str(sigma[1]),
#                 datasets[2]+' :\n'+r'$\mu='+str(mu[2])+',\ \sigma=$'+str(sigma[2]),
#                 datasets[3]+' :\n'+r'$\mu='+str(mu[3])+',\ \sigma=$'+str(sigma[3]),
#                 datasets[4]+' :\n'+r'$\mu='+str(mu[4])+',\ \sigma=$'+str(sigma[4]),
#                 datasets[5]+' :\n'+r'$\mu='+str(mu[5])+',\ \sigma=$'+str(sigma[5])],
#                loc=legend_pos.get(metric),title="Kernel Density Estimation")
#     plt.figure.savefig(plot_save_dir+"datasets_"+metric+"_density.png",dpi=300)
#     plt.figure.show()
#     plt.figure.clf()
# print("Done!")        

In [None]:
for metric in tqdm(metrics):    
    
    # Barplot
    sns.set(rc={'figure.figsize':(12,8)})
    g = sns.barplot(data=df, x="Dataset", y=metric, errorbar="sd", capsize=.1, hue='Dataset Type')
#     g.set(ylim=(0,float('inf')))
#     alpha=.6, 
    g.set(title =metric+" Barplot")
    # g.set_xticklabels(g.get_xticklabels(), rotation=40, ha="right")
    g.xaxis.set_minor_locator(AutoMinorLocator(5))
    g.yaxis.set_minor_locator(AutoMinorLocator(5))
    g.grid(which='minor', alpha=0.3)
    g.grid(which='major', alpha=0.6)
    # Display mu,sigma
    # plt.legend([datasets[0]+' :\n'+r'$\mu='+str(mu[0])+',\ \sigma=$'+str(sigma[0]),datasets[1]+' :\n'+r'$\mu='+str(mu[1])+',\ \sigma=$'+str(sigma[1]),datasets[2]+' :\n'+r'$\mu='+str(mu[2])+',\ \sigma=$'+str(sigma[2]),datasets[3]+' :\n'+r'$\mu='+str(mu[3])+',\ \sigma=$'+str(sigma[3]),datasets[4]+' :\n'+r'$\mu='+str(mu[4])+',\ \sigma=$'+str(sigma[4]),datasets[5]+' :\n'+r'$\mu='+str(mu[5])+',\ \sigma=$'+str(sigma[5])],
    #            loc=legend_pos.get(metric),title="Kernel Density Estimation")
    g.figure.savefig(plot_save_dir+metric+"_barplot.png",dpi=300)
    g.figure.show()
    g.figure.clf()
    
#     # Boxplot
#     sns.set(rc={'figure.figsize':(12,8)})
#     g = sns.boxplot(data=df,x="Dataset",y=metric, hue='Dataset Type')
#     g.set(xlabel =metric, title ='Datasets: '+metric+" Mean")
#     # g.set_xticklabels(g.get_xticklabels(), rotation=40, ha="right")
#     g.xaxis.set_minor_locator(AutoMinorLocator(5))
#     g.yaxis.set_minor_locator(AutoMinorLocator(5))
#     g.grid(which='minor', alpha=0.3)
#     g.grid(which='major', alpha=0.6)
#     # Display mu,sigma
#     # plt.legend([datasets[0]+' :\n'+r'$\mu='+str(mu[0])+',\ \sigma=$'+str(sigma[0]),datasets[1]+' :\n'+r'$\mu='+str(mu[1])+',\ \sigma=$'+str(sigma[1]),datasets[2]+' :\n'+r'$\mu='+str(mu[2])+',\ \sigma=$'+str(sigma[2]),datasets[3]+' :\n'+r'$\mu='+str(mu[3])+',\ \sigma=$'+str(sigma[3]),datasets[4]+' :\n'+r'$\mu='+str(mu[4])+',\ \sigma=$'+str(sigma[4]),datasets[5]+' :\n'+r'$\mu='+str(mu[5])+',\ \sigma=$'+str(sigma[5])],
#     #            loc=legend_pos.get(metric),title="Kernel Density Estimation")
#     g.figure.savefig(plot_save_dir+metric+"_boxplot.png",dpi=300)
#     g.figure.show()
#     g.figure.clf()
    
    
    # Violin plot
    sns.set(rc={'figure.figsize':(12,8)})
    g = sns.violinplot(data=df, x="Dataset", y=metric, cut=0, scale='width', hue='Dataset Type')
    g.set(title =metric+" Violinplot")
    # g.set_xticklabels(g.get_xticklabels(), rotation=40, ha="right")

    g.xaxis.set_minor_locator(AutoMinorLocator(5))
    g.yaxis.set_minor_locator(AutoMinorLocator(5))
    g.grid(which='minor', alpha=0.3)
    g.grid(which='major', alpha=0.6)
    # Display mu,sigma
    # plt.legend([datasets[0]+' :\n'+r'$\mu='+str(mu[0])+',\ \sigma=$'+str(sigma[0]),datasets[1]+' :\n'+r'$\mu='+str(mu[1])+',\ \sigma=$'+str(sigma[1]),datasets[2]+' :\n'+r'$\mu='+str(mu[2])+',\ \sigma=$'+str(sigma[2]),datasets[3]+' :\n'+r'$\mu='+str(mu[3])+',\ \sigma=$'+str(sigma[3]),datasets[4]+' :\n'+r'$\mu='+str(mu[4])+',\ \sigma=$'+str(sigma[4]),datasets[5]+' :\n'+r'$\mu='+str(mu[5])+',\ \sigma=$'+str(sigma[5])],
    #            loc=legend_pos.get(metric),title="Kernel Density Estimation")
    g.figure.savefig(plot_save_dir+metric+"_violinplot.png",dpi=300)
    g.figure.show()
    g.figure.clf()

In [12]:
df_sub = df[~(df["Dataset Type"].astype(bool) & (df["Dataset Type"] == "Training Dataset"))].copy()
print(df_sub)

for metric in tqdm(metrics):    
    
    # Barplot
    sns.set(rc={'figure.figsize':(12,8)})
    g = sns.barplot(data=df_sub, x="Dataset", y=metric, errorbar="sd", capsize=.1)
    g.set(title =metric+" Barplot")
    g.xaxis.set_minor_locator(AutoMinorLocator(5))
    g.yaxis.set_minor_locator(AutoMinorLocator(5))
    g.grid(which='minor', alpha=0.3)
    g.grid(which='major', alpha=0.6)
    g.figure.savefig(metric+"_dataset-type(no-training)_barplot.png",dpi=300,bbox_inches='tight')
    g.figure.show()
    g.figure.clf()
    
    # Violin plot
    sns.set(rc={'figure.figsize':(12,8)})
    g = sns.violinplot(data=df_sub, x="Dataset", y=metric, cut=0, scale='width')
    g.set(title =metric+" Violinplot")
    g.xaxis.set_minor_locator(AutoMinorLocator(5))
    g.yaxis.set_minor_locator(AutoMinorLocator(5))
    g.grid(which='minor', alpha=0.3)
    g.grid(which='major', alpha=0.6)
    g.figure.savefig(metric+"_dataset-type(no-training)_violinplot.png",dpi=300,bbox_inches='tight')
    g.figure.show()
    g.figure.clf()

                       Dataset               Dataset Type  Number  \
0               adl-piano-midi  Music Transformer Outputs     1.0   
1               adl-piano-midi  Music Transformer Outputs     2.0   
2               adl-piano-midi  Music Transformer Outputs     3.0   
3               adl-piano-midi  Music Transformer Outputs     4.0   
4               adl-piano-midi  Music Transformer Outputs     5.0   
..                         ...                        ...     ...   
595  Rock-Piano-\nMIDI-Dataset       Perceiver-AR Outputs   596.0   
596  Rock-Piano-\nMIDI-Dataset       Perceiver-AR Outputs   597.0   
597  Rock-Piano-\nMIDI-Dataset       Perceiver-AR Outputs   598.0   
598  Rock-Piano-\nMIDI-Dataset       Perceiver-AR Outputs   599.0   
599  Rock-Piano-\nMIDI-Dataset       Perceiver-AR Outputs   600.0   

     Pitch Range  Pitches Used  Pitch Classes  Polyphony  Polyphony Rate  \
0           35.0          15.0            8.0   2.354839        0.376344   
1           29.0   

100%|███████████████████████████████████████████| 10/10 [00:13<00:00,  1.34s/it]


<Figure size 1200x800 with 0 Axes>

In [14]:
# load datafram from pickle file
basic_directory="/data/data1/users/astais/Objective-Evaluation/"
df = pd.read_pickle(basic_directory+'/metrics-files/dataframe.pkl')
df.drop('Number', inplace=True, axis=1)
df2=df.corr()
print(df2)

                     Pitch Range  Pitches Used  Pitch Classes  Polyphony  \
Pitch Range             1.000000      0.858416       0.576720   0.580535   
Pitches Used            0.858416      1.000000       0.750763   0.531575   
Pitch Classes           0.576720      0.750763       1.000000   0.334636   
Polyphony               0.580535      0.531575       0.334636   1.000000   
Polyphony Rate          0.502813      0.431317       0.318260   0.809296   
Scale Consistency      -0.351587     -0.574713      -0.554998  -0.145240   
Pitch Entropy           0.793056      0.887345       0.774808   0.499357   
Pitch Class Entropy     0.472365      0.674362       0.802347   0.274008   
Empty Bar Rate         -0.061159     -0.087311      -0.081752  -0.071619   
Tempo Estimation        0.197740      0.154091       0.095040   0.090205   

                     Polyphony Rate  Scale Consistency  Pitch Entropy  \
Pitch Range                0.502813          -0.351587       0.793056   
Pitches Used     

  df2=df.corr()
