# BPSD: Get Durations

- Compute and output statistics for duration of individual sonatas, audio versions,...

Johannes Zeitler (johannes.zeitler@audiolabs-erlangen.de), 2024

In [1]:
import os
import numpy as np
import audioread
import pandas as pd
from copy import deepcopy
import librosa

In [2]:
def getDur(filename):
    return librosa.get_duration(filename=filename)

In [3]:
def sec_to_hMinSec(t):
    t = deepcopy(t)    
    h = t//3600
    t -= h*3600
    m = t//60
    t -= m*60
    s = t

    return "%02i:%02i:%02i"%(h,m,s), h, m, s

In [4]:
orig_dir = os.path.join("../", "0_RawData", "audio_ripped")
final_dir = os.path.join("../", "1_Audio")

In [5]:
performers = list(set([f.split(".")[0].split("_")[-1] for f in os.listdir(final_dir) if ".wav" in f]))
performers.sort()

In [7]:
sonatas = list(set([f.split(".")[0].split("_")[1] for f in os.listdir(final_dir) if ".wav" in f]))

sonatas.sort()

In [8]:
durations_orig = {}
durations_cut = {}
for performer in performers:
    durations_orig[performer] = []
    durations_cut[performer] = []

In [9]:
for performer in performers:
    for sonata in sonatas:
        # duration of orig. files
        durations_orig[performer].append(getDur(os.path.join(orig_dir, performer, "Beethoven_%s_%s.wav"%(sonata, performer))))

In [10]:
for performer in performers:
    for sonata in sonatas:
        # duration of mod. files
        durations_cut[performer].append(getDur(os.path.join(final_dir, "Beethoven_%s_%s.wav"%(sonata, performer))))

In [11]:
print("Performer \t Orig. Dur. \t Mod. Dur.")
for performer in performers:
    totalDur_orig = np.sum(durations_orig[performer])
    totalDur_cut = np.sum(durations_cut[performer])
    
    print("%s    \t %s \t %s"%(performer, sec_to_hMinSec(totalDur_orig)[0], sec_to_hMinSec(totalDur_cut)[0])) 

Performer 	 Orig. Dur. 	 Mod. Dur.
AB96    	 03:54:34 	 03:52:28
AS35    	 03:31:03 	 03:33:35
DB84    	 03:58:37 	 03:58:37
FG58    	 03:34:00 	 03:34:00
FG67    	 03:25:02 	 03:25:02
FJ62    	 03:35:13 	 03:41:26
JJ90    	 03:41:06 	 03:39:14
MB97    	 03:52:23 	 03:46:08
MC22    	 04:08:22 	 04:05:11
VA81    	 03:48:16 	 03:46:27
WK64    	 03:18:26 	 03:45:31


In [12]:
total_dur_cut = 0
total_dur_orig = 0
for key in durations_cut.keys():
    total_dur_cut += np.sum(durations_cut[key])

    total_dur_orig += np.sum(durations_orig[key])

print("Total duration (orig): %s"%(sec_to_hMinSec(total_dur_orig)[0]))
print("Total duration (cut): %s"%(sec_to_hMinSec(total_dur_cut)[0]))

Total duration (orig): 40:47:08
Total duration (cut): 41:07:45


In [13]:
print("Performer \t Orig. Dur. \t Mod. Dur.")
for performer in performers:
    totalDur_orig = np.sum(durations_orig[performer])
    totalDur_cut = np.sum(durations_cut[performer])
    
    print("%s    \t&  %s  &  %s \\\\"%(performer, sec_to_hMinSec(totalDur_orig)[0], sec_to_hMinSec(totalDur_cut)[0])) 

Performer 	 Orig. Dur. 	 Mod. Dur.
AB96    	&  03:54:34  &  03:52:28 \\
AS35    	&  03:31:03  &  03:33:35 \\
DB84    	&  03:58:37  &  03:58:37 \\
FG58    	&  03:34:00  &  03:34:00 \\
FG67    	&  03:25:02  &  03:25:02 \\
FJ62    	&  03:35:13  &  03:41:26 \\
JJ90    	&  03:41:06  &  03:39:14 \\
MB97    	&  03:52:23  &  03:46:08 \\
MC22    	&  04:08:22  &  04:05:11 \\
VA81    	&  03:48:16  &  03:46:27 \\
WK64    	&  03:18:26  &  03:45:31 \\


In [14]:
print("Nr \tOp\t\tMean\tMin\t\t\t Max")
for i, sonata in enumerate(sonatas):
    
    sonata_dur = []
    for performer in performers: sonata_dur.append(durations_cut[performer][i])
    
    iMin = np.argmin(sonata_dur)
    iMax = np.argmax(sonata_dur)
    
    print("%02i\t%s\t%s\t%s (%s)    \t %s (%s)"%(i+1, 
                              sonata, 
                              sec_to_hMinSec(np.mean(sonata_dur))[0][3:],
                              sec_to_hMinSec(sonata_dur[iMin])[0][3:], performers[iMin],
                              sec_to_hMinSec(sonata_dur[iMax])[0][3:], performers[iMax]))                   

Nr 	Op		Mean	Min			 Max
01	Op002No1-01	03:47	03:22 (AS35)    	 04:33 (WK64)
02	Op002No2-01	07:04	06:23 (FG67)    	 07:45 (MC22)
03	Op002No3-01	10:15	09:47 (FG58)    	 11:25 (MC22)
04	Op007-01	08:17	07:27 (AS35)    	 08:58 (MC22)
05	Op010No1-01	05:33	04:41 (AS35)    	 06:13 (MC22)
06	Op010No2-01	05:38	05:03 (FG67)    	 06:14 (VA81)
07	Op010No3-01	06:59	06:26 (FJ62)    	 07:53 (JJ90)
08	Op013-01	08:56	08:06 (FG58)    	 09:57 (MC22)
09	Op014No1-01	06:35	05:31 (VA81)    	 07:25 (AB96)
10	Op014No2-01	07:06	05:49 (AS35)    	 07:56 (AB96)
11	Op022-01	07:26	06:43 (AS35)    	 08:36 (MC22)
12	Op026-01	08:01	06:51 (FG67)    	 10:02 (AS35)
13	Op027No1-01	05:12	04:36 (AB96)    	 05:42 (FG58)
14	Op027No2-01	06:01	04:58 (AS35)    	 07:28 (FG58)
15	Op028-01	09:58	08:58 (FJ62)    	 11:39 (MC22)
16	Op031No1-01	06:23	05:44 (FG58)    	 07:19 (MC22)
17	Op031No2-01	08:27	06:49 (FG58)    	 09:52 (MC22)
18	Op031No3-01	08:29	07:53 (FG67)    	 09:07 (MB97)
19	Op049No1-01	04:35	03:41 (JJ90)    	 05:17 (MB97)
20	

In [15]:
ann_score_coarseStructure_path = os.path.join("../", "2_Annotations", "ann_score_structureCoarse")
ann_audio_measure_dir = os.path.join("../", "2_Annotations", "ann_audio_measure")

# Print a table

In [16]:
performer_macros = {"Ashkenazy": "\Ash",
                    "Barenboim": "\Brb",
                    "Brendel": "\Brd",
                    "BilsonEtAl": "\Bls",
                    "Chemin" : "\Chm",
                    "Gulda1958": "\GldFifty",
                    "Gulda1967": "\GldSixty",
                    "Jando": "\Jnd",
                    "Jank": "\Jnk",
                    "Kempff": "\Kmp",
                    "Schnabel": "\Sch"}

In [22]:
performer_macros = {"VA81": "\Ash",
                    "DB84": "\Brb",
                    "AB96": "\Brd",
                    "MB97": "\Bls",
                    "MC22" : "\Chm",
                    "FG58": "\GldFifty",
                    "FG67": "\GldSixty",
                    "JJ90": "\Jnd",
                    "FJ62": "\Jnk",
                    "WK64": "\Kmp",
                    "AS35": "\Sch"}

In [23]:
sonata_names = {
    "Op002No1-01": "",
    "Op002No2-01": "",
    "Op002No3-01": "",
    "Op007-01": "Grand Sonata",
    "Op010No1-01": "",
    "Op010No2-01": "",
    "Op010No3-01": "",
    "Op013-01": "Pathétique",
    "Op014No1-01": "",
    "Op014No2-01": "",
    "Op022-01": "",
    "Op026-01": "",
    "Op027No1-01": "Son. q. u. fant.",
    "Op027No2-01": "Moonlight",
    "Op028-01": "Pastoral",
    "Op031No1-01": "",
    "Op031No2-01": "Tempest",
    "Op031No3-01": "The Hunt",
    "Op049No1-01": "Easy Sonata",
    "Op049No2-01": "Easy Sonata",
    "Op053-01": "Waldstein",
    "Op054-01": "",
    "Op057-01": "Appassionata",
    "Op078-01": "A Thérèse",
    "Op079-01": "Cuckoo",
    "Op081a-01": "Les adieux",
    "Op090-01": "",
    "Op101-01": "",
    "Op106-01": "Hammerklavier",
    "Op109-01": "",
    "Op110-01": "",
    "Op111-01": "",
}


In [24]:
globalkey = pd.read_csv(os.path.join("../", "2_Annotations", "ann_score_globalkey", "Beethoven_piano_sonatas_globalkey.csv"), sep=";")

In [25]:
print("Nr \tOp\t\tName\tKey\tMean\t\tMin\t\t\t Max \t\t\t \# measures \t structure")
for i, sonata in enumerate(sonatas):
    
    
    
    sonata_dur = []
    for performer in performers: sonata_dur.append(durations_cut[performer][i])    
    iMin = np.argmin(sonata_dur)
    iMax = np.argmax(sonata_dur)
    
    df_meas = pd.read_csv(os.path.join(ann_audio_measure_dir, "Beethoven_"+sonata.split(".")[0]+"_WK64.csv"), sep=";")
    
    meas_start = np.min(df_meas.measure)
    meas_end = np.max(df_meas.measure)
    
    shortForms = []
    df_struct = pd.read_csv(os.path.join(ann_score_coarseStructure_path, "Beethoven_"+sonata+".csv"), sep=";")
    
    for _, row in df_struct.iterrows():
        shortForms.append(row.structure[:1])
        suffix = row.structure.split(" ")[-1]
        
        if (suffix.isnumeric()) or (suffix[-1] == "I"):
            
            
            if suffix == "I" : suffix = "1"
            elif suffix == "II": suffix = "2"
            elif suffix == "III": suffix = "3"
            
            shortForms[-1] = shortForms[-1][0]
            shortForms[-1]+=suffix
        
    shortForm = "-".join(shortForms)
    
    
    print("%02i  &  %s \t& %s &\t %s &\t  %s  &  %s (%s)       \t&   %s (%s)\t&  %i  &  %s\\\\"%(i+1, 
                              sonata, 
                              sonata_names[sonata],
                              globalkey[globalkey.opus==sonata].globalkey.item().replace("#","\#"),                                                             
                              sec_to_hMinSec(np.mean(sonata_dur))[0][3:],
                              sec_to_hMinSec(sonata_dur[iMin])[0][3:], performer_macros[performers[iMin]],
                              sec_to_hMinSec(sonata_dur[iMax])[0][3:], performer_macros[performers[iMax]],
                              np.floor(meas_end),
                              shortForm))           
    


Nr 	Op		Name	Key	Mean		Min			 Max 			 \# measures 	 structure
01  &  Op002No1-01 	&  &	 F:min &	  03:47  &  03:22 (\Sch)       	&   04:33 (\Kmp)	&  200  &  E-E-D-R\\
02  &  Op002No2-01 	&  &	 A:maj &	  07:04  &  06:23 (\GldSixty)       	&   07:45 (\Chm)	&  452  &  E-E-D-R\\
03  &  Op002No3-01 	&  &	 C:maj &	  10:15  &  09:47 (\GldFifty)       	&   11:25 (\Chm)	&  347  &  E-E-D-R-C\\
04  &  Op007-01 	& Grand Sonata &	 Eb:maj &	  08:17  &  07:27 (\Sch)       	&   08:58 (\Chm)	&  497  &  E-E-D-R-C\\
05  &  Op010No1-01 	&  &	 C:min &	  05:33  &  04:41 (\Sch)       	&   06:13 (\Chm)	&  388  &  E-E-D-R\\
06  &  Op010No2-01 	&  &	 F:maj &	  05:38  &  05:03 (\GldSixty)       	&   06:14 (\Ash)	&  268  &  E-E-D-R\\
07  &  Op010No3-01 	&  &	 D:maj &	  06:59  &  06:26 (\Jnk)       	&   07:53 (\Jnd)	&  467  &  E-E-D-R-C\\
08  &  Op013-01 	& Pathétique &	 C:min &	  08:56  &  08:06 (\GldFifty)       	&   09:57 (\Chm)	&  431  &  I-E-E-D-R-C\\
09  &  Op014No1-01 	&  &	 E:maj &	  06:35  &  05:31 (\Ash) 