**TODO**

<ol>
<li>Map the note names to a MIDI number like scale for analyzing intervals. Column Comma53 in figure1</li>
<li>Create a tonic dict for existing makamlar</li>
<li>Relation between measures are indicated how? (Extensions, glissando, my vocab is poor here)</li>
<li>Deal with Multi-part scores. Probably not possible in makam music</li>
</ol>

**Discoveries**
   - Alter<br>
"-1, -4, -5, -8, +1, +4, +5, +8" Comma alterations of the accidentals in the 53 Comma Scale.
   - "divisions" attribute<br>
https://github.com/burakuyar/MusicXMLConverter/blob/master/musicxmlconverter/symbtr2musicxml.py
                measure_len = (temp_num_beats * num_divs *
                           4 / float(temp_beat_type))
**Questions**                       
- Makam Music
    - Can makam music have time signature change?

```Measure
    0: attributes ( even if no note is present) 
        sometimes empty, o.w.
        'divisions'
        'key'
        'time'
    1,.. notes ```

![alt text](fig1.png "Title")

In [3]:
OCTAVE={'C':0,
        'D':9,
        'E':18,
        'F':22,
        'G':31,
        'A':40,
        'B':44}

# sharps ???????
ACCIDENTALS={'quarter-flat':-1,
            'slash-flat':-4,
            'flat':-5,
            'double-slash-flat':-8,
            'natural': 0,
            'quarter-sharp':+1,
            'sharp':+4,
            'slash-quarter-sharp':+5,  
            'slash-sharp':+8
            }

In [4]:
# Code Improved from https://github.com/burakuyar/Tools/blob/master/musicxml_player.py
# Data from https://github.com/sertansenturk/SymbTr

import os
import glob
from collections import defaultdict

import numpy as np

import xml.etree.ElementTree as ET

DATA_DIR=os.path.join(os.getcwd(), 'data')

In [9]:
def read_score(score_path):
    tree=ET.parse(score_path)
    root=tree.getroot()
    return root

# TODO: get from xml not path
def get_composer_info(score_path):
    file_name=os.path.splitext(os.path.basename(score_path))[0]
    composition_name=file_name.split('--')[-2]
    composer_name=file_name.split('--')[-1]
    return composition_name,composer_name 

def get_makam_form_usul(root):
    makam_form_usul=root.find('part/measure/direction/direction-type/words').text.split(', ')
    makam=makam_form_usul[0].split(': ')[-1]
    form=makam_form_usul[1].split(': ')[-1]
    usul=makam_form_usul[2].split(': ')[-1][:-1]
    return makam, form, usul

def note_to_number(p,o,a):
    """Convert NoteAEU to AEU53 Comma numbering. """
    if p!='Rest':
        return 53*(int(o)+1)+OCTAVE[p]+ACCIDENTALS[a]
    else:
        return -1

def parse_notes(root, record_embellishment=True):
    """
    Returns a 2D array of [[measure_idx,note_idx,note_duration,note_number, *note_name]]
    Note name can only be "PitchClass Octave", "PitchClass Octave Accidental" or "Rest"
    If a note_name is not Rest and it has zero duration, that note is an embellishment.
    """
    notes=[]
    for m_idx,measure in enumerate(root.findall('part/measure')):   
        if len(measure.findall('note'))>0: # Check if the measure contains at least one note
            grace_count=0 # Count grace notes in case you don't want to record them
            for n_idx,note in enumerate(measure.findall('note')):
                dur=note.find('duration')
                if dur is None:
                    if not record_embellishment:
                        grace_count+=1
                        continue # skip the grace note
                    else:
                        dur='0' # Embellishment/Grace Note
                else:
                    dur=dur.text
                step=note.find('pitch/step')
                if step is not None:
                    step=step.text
                    octave=note.find('pitch/octave').text
                    acc = note.find('accidental')
                    if acc is None:
                        n=[step,octave,'natural']
                    else:
                        n=[step,octave,acc.text]
                else:
                    rest = note.find('rest')
                    assert rest is not None, "The note doesn't have a pitch and is not a rest!"
                    n=['Rest','','']
                note_number=note_to_number(*n)
                note = [m_idx, n_idx-grace_count, dur, note_number, *n]
                notes.append(note)
    return np.array(notes)

def get_time_signatures(root):
    """Returns all time signatures in the score as a list of tupples.
    Assumes it is possible to have time change in makam pieces."""
    beats = [t.text for t in root.findall('part/measure/attributes/time/beats')]
    types = [t.text for t in root.findall('part/measure/attributes/time/beat-type')]
    all_time_signatures=[(int(b),int(t)) for b,t in zip(beats,types)]
    return all_time_signatures

def get_bpm(root):
    return float(root.find('part/measure/direction/sound').attrib['tempo'])

def get_divisions(root):
    return float(root.find('part/measure/attributes/divisions').text)

def find_key_signature_accidentals(root):
    notes, accidentals = [], []
    for k in root.iter('key'):
        for ks in k.findall('key-step'):
            notes.append(ks.text)
        for ka in k.findall('key-accidental'):
            accidentals.append(ka.text)
    return ['{} {}'.format(n,k) for n,k in zip(notes,accidentals)]

def find_all_accidentals(root):
    return set([a.text for a in root.iter('accidental')])

# 1) Find all xml files in the DATA_DIR

In [10]:
score_paths = glob.glob(DATA_DIR+'/*.xml')
print(f'There are {len(score_paths)} scores in the directory.')

There are 2200 scores in the directory.


# 2) Process

## A) Process 1 XML File

In [11]:
# Read one xml file to a tree structure
score_path=score_paths[0]
root=read_score(score_path)

In [12]:
# Get necessary information
makam,form,usul=get_makam_form_usul(root)
print(f'Makam: {makam}')
print(f'Form: {form}')
print(f'Usul: {usul}')
time_signatures=get_time_signatures(root)
print(f'Time Signature(s): {time_signatures}')
bpm=get_bpm(root)
print(f'BPM: {bpm}')
divs=get_divisions(root)
print(f'Divisions: {divs}')
key_signature_accidentals=find_key_signature_accidentals(root)
print(f'Accidental(s) in the key signature: {key_signature_accidentals}')
notes=parse_notes(root, record_embellishment=False)

Makam: Uşşak
Form: Şarkı
Usul: Aksak
Time Signature(s): [(9, 8)]
BPM: 60.0
Divisions: 96.0
Accidental(s) in the key signature: ['B quarter-flat']


In [19]:
print('Meas Pos Dur  Num   P   Oct  Acc')
print('='*len('Meas Pos Dur  Num   P   Oct  Acc'))
for n in notes:
    print(n)   

Meas Pos Dur  Num   P   Oct  Acc
['0' '0' '96' '-1' 'Rest' '' '']
['0' '1' '72' '296' 'G' '4' 'natural']
['0' '2' '24' '305' 'A' '4' 'natural']
['0' '3' '96' '305' 'A' '4' 'natural']
['0' '4' '96' '318' 'C' '5' 'natural']
['0' '5' '24' '318' 'C' '5' 'natural']
['0' '6' '24' '308' 'B' '4' 'quarter-flat']
['1' '0' '36' '308' 'B' '4' 'quarter-flat']
['1' '1' '12' '305' 'A' '4' 'natural']
['1' '2' '24' '296' 'G' '4' 'natural']
['1' '3' '24' '305' 'A' '4' 'natural']
['1' '4' '24' '308' 'B' '4' 'quarter-flat']
['1' '5' '24' '305' 'A' '4' 'natural']
['1' '6' '24' '327' 'D' '5' 'natural']
['1' '7' '24' '318' 'C' '5' 'natural']
['1' '8' '24' '318' 'C' '5' 'natural']
['1' '9' '24' '308' 'B' '4' 'quarter-flat']
['1' '10' '24' '308' 'B' '4' 'quarter-flat']
['1' '11' '24' '305' 'A' '4' 'natural']
['1' '12' '48' '305' 'A' '4' 'natural']
['1' '13' '48' '296' 'G' '4' 'natural']
['1' '14' '24' '308' 'B' '4' 'quarter-flat']
['1' '15' '24' '305' 'A' '4' 'natural']
['2' '0' '96' '305' 'A' '4' 'natural']
[

## B) Process All XML Files

In [None]:
# Create a dictionary containing all the necessary information.
dataset=defaultdict(list)
for score_path in score_paths:
    root=read_score(score_path)
    makam,form,usul=get_makam_form_usul(root)
    composition_name,composer_name =get_composer_info(score_path)
    score_dict={'composition': composition_name,
                'composer': composer_name,
                'form': form,
                'usul': usul,
                'time_signatures': get_time_signatures(root),
                'bpm': get_bpm(root),
                'divs': get_divisions(root),
                'key_sigsnature_accidentals': find_key_signature_accidentals(root),
                'notes': parse_notes(root, record_embellishment=False)
                }
    dataset[makam].append(score_dict)
#print(list(dataset.keys()))    

In [None]:
for makam,dct in dataset.items():
    print(f'{makam}: {len(dct)}')

In [None]:
# Remove makamlar without enough scores
THRESHOLD=50
subset={k:v for k,v in dataset.items() if len(v)>=THRESHOLD}
count=0
for makam,dct in subset.items():
    print(f'{makam}: {len(dct)}')
    count+=len(dct)
print(count)

In [None]:
single_makam=dataset['Muhayyer']
composition=single_makam[0]

In [None]:
composition