In [2]:
from music21 import note, chord, converter
import pandas as pd

def extract_voice_data(score, apply_strip_ties=False):
    voice_data = []
    if apply_strip_ties:
        score = score.stripTies()

    notes_and_chords = score.flatten().notesAndRests
    for element in notes_and_chords:
        if isinstance(element, (note.Note, chord.Chord)):
            measure_num = element.measureNumber
            position_within_measure = element.offset
            duration = element.duration.quarterLength
            pitches = [str(element.pitch)] if isinstance(element, note.Note) else [str(p) for p in element.pitches]
            midi_values = [element.pitch.midi] if isinstance(element, note.Note) else [p.midi for p in element.pitches]
            voice_data.extend([(measure_num, position_within_measure, duration, pitch, midi) for pitch, midi in zip(pitches, midi_values)])
    return voice_data

file_path = 'liszt.mxl'
score = converter.parse(file_path)

voice_data_with_ties = extract_voice_data(score, apply_strip_ties=True)
voice_data_no_ties = extract_voice_data(score, apply_strip_ties=False)

df_with_ties = pd.DataFrame(voice_data_with_ties, columns=['Measure', 'Local Onset', 'Duration', 'Pitch', 'MIDI'])
df_no_ties = pd.DataFrame(voice_data_no_ties, columns=['Measure', 'Local Onset', 'Duration', 'Pitch', 'MIDI'])

print("\ndf with ties:")
display(df_with_ties)
print("\ndf without ties:")
display(df_no_ties)

# Direct comparison to see if any rows differ
differences = pd.concat([df_with_ties, df_no_ties]).drop_duplicates(keep=False)
print("Differences between DataFrames:")
print(differences)

# Descriptive statistics comparison
print("\nDescriptive statistics with ties:")
print(df_with_ties.describe())
print("\nDescriptive statistics without ties:")
print(df_no_ties.describe())

# Calculate Jaccard Similarity
intersection = pd.merge(df_with_ties, df_no_ties, how='inner')
union = pd.concat([df_with_ties, df_no_ties]).drop_duplicates()
jaccard_index = len(intersection) / len(union) if len(union) > 0 else 1.0  # Check if union is not empty
print(f"\nJaccard Similarity Index: {jaccard_index}")



df with ties:


Unnamed: 0,Measure,Local Onset,Duration,Pitch,MIDI
0,1,0.0,1.0,A-4,68
1,1,0.0,1.0,C5,72
2,1,0.0,1.0,E-5,75
3,1,0.0,1.0,A-5,80
4,1,0.0,1.0,C3,48
5,1,0.0,1.0,A-3,56
6,1,0.0,1.0,E-4,63
7,1,1.0,1.0,G4,67
8,1,1.0,1.0,B-4,70
9,1,1.0,1.0,E-5,75



df without ties:


Unnamed: 0,Measure,Local Onset,Duration,Pitch,MIDI
0,1,0.0,1.0,A-4,68
1,1,0.0,1.0,C5,72
2,1,0.0,1.0,E-5,75
3,1,0.0,1.0,A-5,80
4,1,0.0,1.0,C3,48
5,1,0.0,1.0,A-3,56
6,1,0.0,1.0,E-4,63
7,1,1.0,1.0,G4,67
8,1,1.0,1.0,B-4,70
9,1,1.0,1.0,E-5,75


Differences between DataFrames:
Empty DataFrame
Columns: [Measure, Local Onset, Duration, Pitch, MIDI]
Index: []

Descriptive statistics with ties:
       Measure  Local Onset  Duration       MIDI
count     14.0    14.000000      14.0  14.000000
mean       1.0     0.500000       1.0  65.857143
std        0.0     0.518875       0.0  10.272122
min        1.0     0.000000       1.0  48.000000
25%        1.0     0.000000       1.0  57.750000
50%        1.0     0.500000       1.0  67.500000
75%        1.0     1.000000       1.0  74.250000
max        1.0     1.000000       1.0  80.000000

Descriptive statistics without ties:
       Measure  Local Onset  Duration       MIDI
count     14.0    14.000000      14.0  14.000000
mean       1.0     0.500000       1.0  65.857143
std        0.0     0.518875       0.0  10.272122
min        1.0     0.000000       1.0  48.000000
25%        1.0     0.000000       1.0  57.750000
50%        1.0     0.500000       1.0  67.500000
75%        1.0     1.000000   