## Discoverying significant patterns in Arab-Andalusian Music using SIA
October - 2020

In [97]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [98]:
import sys
sys.path.append('../src/')
sys.path.append('..')

import extraction
import itertools
import model
import persistence
import reporting
import sia_helpers

In [19]:
from collections import Counter
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import music21 as m21
import ijson

### Load

Load mappings Tab mapping and recording metadata

In [61]:
data_path = '../data'

In [87]:
# Tab: list of centones
with open('../data/centones_tab.json') as f:
    centones_tab = json.load(f)
    
# Recording Descriptions
andalusian_description =  pd.read_json(os.path.join(data_path, 'andalusian_description.json'))
mbid_tab_lookup = sia_helpers.mbids_per_tab(andalusian_description)

In [88]:
# Remove Spaces
centones_tab = {k:[i.replace(' ','') for i in v] for k,v in centones_tab.items()}

# Remove Duplicates
centones_tab = {k:list(set(v)) for k,v in centones_tab.items()}

In [10]:
# scores with chords
scores = [s for s in scores if s != '0386e377-7212-43e5-89b6-7f4c42d0ae74']

# Pattern extraction


Load all scores into a stream of notes

In [62]:
def extract_pattern_grams(notes, min_n=2, max_n=2):
    """
    For a list of list of notes, <notes>
    Extract all possible note-grams up to a maximum length of <n>
    Converts stream of notes to bag-of-patterns
    """
    num_notes = len(notes)
    comb  = []
    for i in range(num_notes):
        # Final n patterns are counted more than once
        n_ = num_notes - i if max_n > num_notes - i else max_n
        comb.append([notes[i:i+j] for j in range(2,n_+1)])
    flat = [i for c in comb for i in c]
    return ' '.join([''.join(x) for x in flat if len(x) >= min_n if 'R' not in x])

def pattern_stream_from_score(path, rest_quarter_length=0):
    """
    Load a score from <path> and return an ordered list of notes
    R represents a rest greater than or equal to <rest_quarter_length>
    ...rests shorter than <rest_quarter_length> are ignored

    Fails if score contains chords
    """

    s = music21.converter.parse(path)
    p = s.parts[0]

    # These are all the notes of the whole piece, fails for chords
    notes_and_rests = p.flat.notesAndRests.stream()
    notes = []
    for n in notes_and_rests:
        if n.isRest:
            if n.duration.quarterLength > rest_quarter_length:
                note = 'R'
            octave = ''
        else:
            note = str(n.pitch.name)
            octave = str(n.pitch.octave)
        duration = n.duration.quarterLength
        notes.append((note, octave, duration))
    return notes

In [63]:
mbid_patterns = {}
for score in scores:
    score_path = os.path.join(data_path, 'scores_xml', score + '.xml')
    try:
        notes = pattern_stream_from_score(score_path)
    except:
        continue
    notes = [nt[0] for nt in notes]
    patterns = extract_pattern_grams(notes, min_n=3, max_n=7)
    patterns = patterns.split(' ')
    mbid_patterns[score] = patterns

# Extract SIA output patterns

Extract relevant patterns per score. It will only be accepted as valid the output patterns that contain consecutive notes on the score

In [None]:
# convert all scores to offset .txt file
for score in scores:
    scores_offset = os.path.join(data_path, 'scores_offset')
    if os.path.isdir(score_offset):
        os.mkdir(score_offset)
    score_path = os.path.join(score_offset, score + '.xml')
    s = m21.converter.parse(score_path)
    p = s.parts[0]
    notes = p.flat.notes.stream()
    with open(os.path.join(score_offset, score + '.txt'), 'w') as f:
        for n in notes:
            f.write(str(m21.midi.translate.offsetToMidi(n.offset)) + ' ')

SIA code has been taken from the next repository: https://github.com/andrebola/patterns-genres (Look at it for more info)
SIA was applied to every score of our dataset separately using the SiaMain class. The results have been store under 'results/SIA_output' directory

In [57]:
sia_patterns = {}
for score in scores:
    with open(os.path.join('<path/to/SIA/results>'), 'r') as f:
        sia_patterns[score] = list(ijson.items(f, 'SiaTonic1'))[0]


In [59]:
# only get consecutive patterns from every score
sia_patterns = sia_helpers.get_consecutive_patterns(score_offset, sia_patterns, scores)
# transform SIA format pattern to notename format
sia_patterns = sia_helpers.sia_to_notename(sia_patterns)
# order sia patterns per tab
sia_patterns_tab = sia_helpers.compute_patterns_per_tab(sia_patterns, mbid_tab_lookup, centones_tab)

# Evaluation

Evaluation of the output patterns using a minimum frequency of 59 occurrences per tab per score, based on what's been analysed on the paper.

In [73]:
final_patterns = sia_helpers.filter_patterns_by_min_n(scores, mbid_patterns, mbid_tab_lookup, sia_patterns_tab, 59)

In [94]:
# Overall metrics
R_exact,P_exact = sia_helpers.compute_exact_R_P(final_patterns, centones_tab)
R_super, P_super = sia_helpers.compute_superstring_R_P(final_patterns, centones_tab)
print("Overall metrics: \n With exact matches: R={:.2f}, P={:.2f} \n With superstring matches: R={:.2f}, P={:.2f}".format(R_exact, P_exact, R_super, P_super))

Overal metrics: 
 With exact matches: R=0.38, P=0.27 
 With superstring matches: R=0.41, P=0.29


In [100]:
# Metrics by tab
for tab in final_patterns:
    tab_dict = {tab: final_patterns[tab]}
    print('\n' + '###########################################' + tab + '###########################################')
    R_exact, P_exact = sia_helpers.compute_tab_exact_R_P(tab_dict, centones_tab)
    R_super, P_super = sia_helpers.compute_tab_superstring_R_P(tab_dict, centones_tab)
    print("\n With exact matches: R={:.2f}, P={:.2f} \n With superstring matches: R={:.2f}, P={:.2f}".format(R_exact, P_exact, R_super, P_super))


###########################################al-‘uššāq###########################################

 With exact matches: R=0.12, P=0.22 
 With superstring matches: R=0.12, P=0.22

###########################################raml al-māya###########################################

 With exact matches: R=0.08, P=0.30 
 With superstring matches: R=0.08, P=0.30

###########################################raṣd al-ḏāyl###########################################

 With exact matches: R=0.18, P=0.25 
 With superstring matches: R=0.18, P=0.25

###########################################‘irāq al-‘aŷam###########################################

 With exact matches: R=0.12, P=0.33 
 With superstring matches: R=0.12, P=0.33

###########################################al-māya###########################################

 With exact matches: R=0.03, P=0.12 
 With superstring matches: R=0.03, P=0.12

###########################################al-raṣd###########################################

 With exac