In [3]:
file = r"C:\Users\janet\Documents\MuseScore4\Scores\testv1.mscz"

In [6]:
import xml.etree.ElementTree as ET
import os
import zipfile
import tempfile
from pathlib import Path

def get_pitch_name(pitch_value):
    """Convert MIDI pitch number to note name (e.g., 60 -> C4)"""
    pitch_names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
    note = pitch_value % 12
    octave = (pitch_value // 12) - 1
    return f"{pitch_names[note]}{octave}"

def extract_pitches_from_mscx(mscx_file_path, output_file_path=None, debug=True):
    """Extract pitch names from a MuseScore .mscx or .mscz file"""
    try:
        # Check if file is .mscz (compressed)
        if mscx_file_path.endswith('.mscz'):
            print("Detected .mscz file, extracting...")
            with zipfile.ZipFile(mscx_file_path, 'r') as zip_ref:
                # The main score is usually in a file without extension or named with .mscx
                file_list = zip_ref.namelist()
                
                # Look for the main score file (usually the largest XML file)
                score_file = None
                for name in file_list:
                    if name.endswith('.mscx') or (not '.' in name and not name.endswith('/')):
                        score_file = name
                        break
                
                if score_file is None:
                    # Try the first file
                    score_file = file_list[0]
                
                print(f"Reading {score_file} from archive...")
                with zip_ref.open(score_file) as f:
                    tree = ET.parse(f)
                    root = tree.getroot()
        else:
            # Regular .mscx file
            tree = ET.parse(mscx_file_path)
            root = tree.getroot()
        
        if debug:
            print(f"\nRoot tag: {root.tag}")
            print(f"Root attributes: {root.attrib}")
            
            # Print first few levels of structure
            print("\nXML Structure (first 20 elements):")
            count = 0
            for elem in root.iter():
                if count < 20:
                    print(f"  {elem.tag}: {elem.text[:50] if elem.text and elem.text.strip() else ''}")
                    count += 1
        
        pitches = []
        
        # Try multiple approaches to find notes
        
        # Approach 1: Look for Chord elements (MuseScore 4 structure)
        for chord in root.iter('Chord'):
            for note in chord.findall('Note'):
                pitch_elem = note.find('pitch')
                if pitch_elem is not None and pitch_elem.text:
                    midi_pitch = int(pitch_elem.text)
                    pitch_name = get_pitch_name(midi_pitch)
                    pitches.append(pitch_name)
                    if debug and len(pitches) <= 5:
                        print(f"Found note (Chord/Note/pitch): {pitch_name} (MIDI: {midi_pitch})")
        
        # Approach 2: Direct Note elements with pitch
        if not pitches:
            for note in root.iter('Note'):
                pitch_elem = note.find('pitch')
                if pitch_elem is not None and pitch_elem.text:
                    midi_pitch = int(pitch_elem.text)
                    pitch_name = get_pitch_name(midi_pitch)
                    pitches.append(pitch_name)
                    if debug and len(pitches) <= 5:
                        print(f"Found note (Note/pitch): {pitch_name} (MIDI: {midi_pitch})")
        
        # Approach 3: Try with namespaces
        if not pitches:
            ns = {'m': 'http://www.musescore.org/mscx'}
            for note in root.findall('.//m:Note', ns):
                pitch_elem = note.find('m:pitch', ns)
                if pitch_elem is not None and pitch_elem.text:
                    midi_pitch = int(pitch_elem.text)
                    pitch_name = get_pitch_name(midi_pitch)
                    pitches.append(pitch_name)
        
        if debug:
            print(f"\n{'='*50}")
            if pitches:
                print(f"Successfully extracted {len(pitches)} pitches!")
            else:
                print("No pitches found. Checking for Note elements...")
                note_count = len(list(root.iter('Note')))
                chord_count = len(list(root.iter('Chord')))
                print(f"Found {note_count} Note elements")
                print(f"Found {chord_count} Chord elements")
        
        # Write to output file
        if pitches:
            if output_file_path is None:
                output_file_path = os.path.splitext(mscx_file_path)[0] + "_pitches.txt"
            
            with open(output_file_path, 'w', encoding='utf-8') as f:
                for pitch in pitches:
                    f.write(pitch + '\n')
            
            print(f"Extracted {len(pitches)} pitches to: {output_file_path}")
        
        return pitches
        
    except Exception as e:
        print(f"Error processing file: {e}")
        import traceback
        traceback.print_exc()
        return None

# Run the extraction
# Assumes 'file' variable contains the path to your .mscx or .mscz file
pitches = extract_pitches_from_mscx(file, debug=True)

# Display the pitches
if pitches:
    print(f"\nFirst 20 pitches:")
    for i, pitch in enumerate(pitches[:20]):
        print(f"  {i+1}. {pitch}")
    if len(pitches) > 20:
        print(f"  ... and {len(pitches) - 20} more")
else:
    print("\nNo pitches extracted. Please check the debug output above.")

Detected .mscz file, extracting...
Reading testv1.mscx from archive...

Root tag: museScore
Root attributes: {'version': '4.50'}

XML Structure (first 20 elements):
  museScore: 
  programVersion: 4.5.1
  programRevision: 603eca8
  Score: 
  eid: 6zvBN9o2REN_owD1RHO2qfB
  Division: 480
  showInvisible: 1
  showUnprintable: 1
  showFrames: 1
  showMargins: 0
  open: 1
  metaTag: 
  metaTag: 
  metaTag: Composer / arranger
  metaTag: 
  metaTag: 2025-12-24
  metaTag: 
  metaTag: 
  metaTag: 
  metaTag: Microsoft Windows
Found note (Chord/Note/pitch): E4 (MIDI: 64)
Found note (Chord/Note/pitch): C5 (MIDI: 72)
Found note (Chord/Note/pitch): C5 (MIDI: 72)
Found note (Chord/Note/pitch): B4 (MIDI: 71)
Found note (Chord/Note/pitch): A4 (MIDI: 69)

Successfully extracted 6 pitches!
Extracted 6 pitches to: C:\Users\janet\Documents\MuseScore4\Scores\testv1_pitches.txt

First 20 pitches:
  1. E4
  2. C5
  3. C5
  4. B4
  5. A4
  6. D5
