In [1]:
from music21 import converter, instrument, note, chord
from pathlib import Path

In [3]:
# Load ONE MIDI file to understand its structure
data_path = Path("../data/midi/")

midi_files = list(data_path.rglob("*.mid")) + list(data_path.rglob("*.midi"))
print(f"Found {len(midi_files)} MIDI files.")

sample_file = midi_files[0]
print(f"\nAnalyzing: {sample_file.name}")

Found 1276 MIDI files.

Analyzing: MIDI-Unprocessed_XP_08_R1_2004_01-02_ORIG_MID--AUDIO_08_R1_2004_01_Track01_wav.midi


In [4]:
# Parse the MIDI file
midi_data = converter.parse(sample_file)

print("Type:", type(midi_data))
print("\nStructure of MIDI file:")
print(midi_data)

Type: <class 'music21.stream.base.Score'>

Structure of MIDI file:
<music21.stream.Score 0x7f585854c2d0>


In [5]:
# Understanding Parts (instruments/tracks)
parts = instrument.partitionByInstrument(midi_data)

if parts:
    print(f"Number of parts (instruments): {len(parts.parts)}")
    for i, part in enumerate(parts.parts):
        print(f"  Part {i}: {part.partName}")
else:
    print("Single track MIDI file.")

Number of parts (instruments): 1
  Part 0: Piano


In [8]:
# Extract notes and chords
notes_list = []

# Get the notes from the MIDI
elements_to_parse = parts.parts[0].recurse() if parts else midi_data.flat.notes

for element in elements_to_parse:
    if isinstance(element, note.Note):
        # Single note: store its pitch (e.g., 'C4', 'F#5')
        notes_list.append(str(element.pitch))
    
    elif isinstance(element, chord.Chord):
        # Chord: multiple notes together, store as 'C4.E4.G4'
        chord_str = '.'.join(str(p) for p in element.pitches)
        notes_list.append(chord_str)

print(f"--- Summary ---")
print(f"Total elements extracted: {len(notes_list)}")
print(f"First 10 elements: {notes_list[:10]}")

--- Summary ---
Total elements extracted: 4645
First 10 elements: ['D1', 'D2', 'D3', 'E3', 'F3', 'D1', 'D2', 'G3', 'A3.B3', 'C#4']
