# Dataset statistics

In [1]:
import pandas as pd
import json

In [2]:
ids = {
    'creighton': '',
    'erk': '' ,
    'boehme': '',
    'han': '',
    'shanxi': '',
    'natmin': '',
    'liber-antiphons': '',
    'liber-alleluias': '',
    'liber-responsories': ''
}

In [7]:
tolerance_parameters = json.load(open('../results/huron-tolerance-parameter-search.json', 'r'))

In [12]:
print("""\\begin{tabular*}{\\textwidth}{
    l@{\\extracolsep{\\fill} }
    r@{\\extracolsep{\\fill} }
    r@{\\extracolsep{\\fill} }
    r@{\\extracolsep{\\fill} }
    r@{\\extracolsep{\\fill} }
    r@{\\extracolsep{\\fill} }
    r}""")
print('\\toprule')
print('id & songs & phrases & avg len & avg dur &$\epsilon$ &$H_\epsilon$ \\\\')
print('\\midrule')

total_phrases = 0
total_songs = 0

for dataset_id, description in ids.items():
    path = f'../data/{dataset_id}-phrase-contours.csv'
    df = pd.read_csv(path, index_col=0)
    num_phrases = len(df)
    num_songs = len(df['song_id'].unique())
    id_col = f'\\texttt{{{dataset_id}}}'
    avg_phrase_len = df['phrase_length'].mean()
    std_phrase_len = df['phrase_length'].std()
    avg_phrase_dur = df['phrase_duration'].mean()
    std_phrase_dur = df['phrase_duration'].std()
    
    total_phrases += num_phrases
    total_songs += num_songs
    
    tol = tolerance_parameters[dataset_id]['tolerance']
    entropy = tolerance_parameters[dataset_id]['entropy']
    
    if dataset_id.startswith('liber'):
        row = f'{id_col: <30}& {num_songs: >5} & {num_phrases: >5} \
                & {avg_phrase_len:.1f} ({std_phrase_len:.1f}) \
                & --- \
                & {tol:.1f} &{entropy:.2f} \
                \\\\'
    
    else:
        row = f'{id_col: <30}& {num_songs: >5} & {num_phrases: >5} \
                & {avg_phrase_len:.1f} ({std_phrase_len:.1f}) \
                & {avg_phrase_dur:.1f} ({std_phrase_dur:.1f}) \
                & {tol:.1f} &{entropy:.2f} \
                \\\\'
    print(row)
print(f'\midrule Total: &{total_songs} & {total_phrases} &&&&\\\\')
print('\\bottomrule')
print('\\end{tabular*}')

\begin{tabular*}{\textwidth}{
    l@{\extracolsep{\fill} }
    r@{\extracolsep{\fill} }
    r@{\extracolsep{\fill} }
    r@{\extracolsep{\fill} }
    r@{\extracolsep{\fill} }
    r@{\extracolsep{\fill} }
    r}
\toprule
id & songs & phrases & avg len & avg dur &$\epsilon$ &$H_\epsilon$ \\
\midrule
\texttt{creighton}            &   152 &   883                 & 9.8 (2.9)                 & 8.6 (3.0)                 & 1.6 &2.16                 \\
\texttt{erk}                  &  1700 &  9782                 & 8.1 (2.0)                 & 6.6 (2.4)                 & 1.5 &2.11                 \\
\texttt{boehme}               &   704 &  4852                 & 8.3 (2.1)                 & 6.8 (2.2)                 & 1.5 &2.15                 \\
\texttt{han}                  &  1219 &  7504                 & 12.1 (4.7)                 & 7.0 (2.6)                 & 1.5 &2.01                 \\
\texttt{shanxi}               &   802 &  2618                 & 14.5 (4.3)                 & 8.5 (2.2)  

In [5]:
df

Unnamed: 0_level_0,song_id,phrase_num,phrase_length,phrase_duration,huron_type,adams_type,0,1,2,3,...,40,41,42,43,44,45,46,47,48,49
contour_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
liber-antiphons-00001,30,0,9,9.0,convex,S2 D1 R2,57,57,57,57,...,59,59,59,59,57,57,57,57,57,57
liber-antiphons-00002,30,1,10,10.0,convex,S3 D1 R2,60,60,60,60,...,62,62,62,62,62,64,64,64,64,64
liber-antiphons-00003,30,2,9,9.0,horizontal,S3 D1 R2,60,60,60,60,...,62,62,62,62,62,62,62,62,62,62
liber-antiphons-00004,30,3,11,11.0,horizontal-ascending,S2 D2 R1,62,62,62,62,...,64,62,62,62,62,62,62,62,62,62
liber-antiphons-00005,30,4,16,16.0,ascending,S3 D1 R2,60,60,60,60,...,67,67,67,65,65,65,65,65,65,65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
liber-antiphons-05095,8220,5,2,2.0,horizontal,S2 D0 R0,67,67,67,67,...,67,67,67,67,67,67,67,67,67,67
liber-antiphons-05096,8717,0,15,15.0,ascending-horizontal,S3 D1 R2,64,64,64,64,...,67,67,67,67,67,67,67,67,67,67
liber-antiphons-05097,8717,1,16,16.0,convex,S3 D1 R2,65,65,65,65,...,65,65,65,67,67,67,67,67,67,67
liber-antiphons-05098,8717,2,15,15.0,concave,S2 D2 R1,67,67,67,67,...,69,69,69,67,67,67,67,67,67,67
