In [288]:
from __future__ import annotations
import json
import dataclasses as dc
from typing import Optional, ClassVar, Iterator
from collections import Counter
import re, io, math

In [86]:
with open("../data/massive_abcnotation_dataset.json", "r") as file:
    data = json.load(file)

In [87]:
@dc.dataclass
class Tune:
    unit_note_length: str = ""  # L
    meter: str = ""  # M
    key: str = ""  # K
    tempo: str = ""  # Q
    tune: str = ""
    control_code: str = ""

    MODES: ClassVar[tuple[str]] = ("min", "dor", "mix", "lyd", "phr",  "loc")
    INTEGER_REGEX: ClassVar[re.Pattern] = re.compile("[0-9]+")

    @classmethod
    def parse(cls, text: str) -> Tune:
        unit_note_length, meter, key, tempo, tune = "", "", "", "", ""
        for line in text.splitlines():
            if line[:2] == "X:":
                continue
            elif line[:2] == "L:":
                unit_note_length = line
            elif line[:2] == "M:":
                if len(cls.INTEGER_REGEX.findall(line)) == 2:
                    meter = line
                else:
                    meter = ""
            elif line[:2] == "K:":
                key = line
            elif line[:2] == "Q:":
                tempo = line
            else:
                tune += f"{line}\n"

        return Tune(unit_note_length, meter, key, tempo, tune)
    
    def write_field_name(self, buffer: io.StringIO, field: str):
        if field:
            buffer.write(field)
            buffer.write("\n")
    
    def __str__(self):
        buffer = io.StringIO()
        self.write_field_name(buffer, self.unit_note_length)
        self.write_field_name(buffer, self.meter)
        self.write_field_name(buffer, self.key)
        self.write_field_name(buffer, self.tempo)
        buffer.write(self.tune)
        return buffer.getvalue()
    
    def key_and_mode(self) -> tuple[str, str]:
        for mode in self.MODES:
            regex = re.compile(mode)
            splitted = regex.split(self.key)
            if len(splitted) > 1:
                return splitted[0], mode
        return self.key, ""
    
    def split_meter(self) -> Optional[tuple[int, int]]:
        if self.meter:
            found = self.INTEGER_REGEX.findall(self.meter)
            return int(found[0]), int(found[1])
        else:
            return None

    def split_tempo(self) -> Optional[tuple[int, int, int]]:
        if self.tempo:
            found = self.INTEGER_REGEX.findall(self.tempo)
            return int(found[0]), int(found[1]), int(found[2])
        else:
            return None
    
    def unit_note_length_base(self) -> int:
        return int(self.INTEGER_REGEX.findall(self.unit_note_length)[1])

    def get_tempo_unit_base(self) -> int:
        if self.meter:
            return self.split_meter()[1]
        return self.unit_note_length_base()


class MassiveAbcnotationDataset:

    def __init__(self, data: Optional[list[Tune]] = None):
        self.data: list[Tune] = data

    def load(self, path: str) -> MassiveAbcnotationDataset:
        with open(path, "r") as file:
            data = json.load(file)
            self.data = [self.make_tune_from_data(*d.values()) for d in data]
        return self

    def make_tune_from_data(self, control_code: str, abc_notation: str) -> Tune:
        tune = Tune.parse(abc_notation)
        tune.control_code = control_code
        return tune

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx: int) -> Tune:
        return self.data[idx]

In [88]:
dataset = MassiveAbcnotationDataset().load("../data/massive_abcnotation_dataset.json")

### Key

In [89]:
#@title hello

keys = Counter((tune.key for tune in dataset))
just_keys = Counter()
modes = Counter()

for tune in dataset:
    key, mode = tune.key_and_mode()
    just_keys.update([key])
    modes.update([mode])

# key = list(keys.keys())[3]
# split_mode_from_key(key)

# for key, count in keys.most_common():
#     jkey, mode = split_mode_from_key(key)
#     print(f"{key:7} {count:10} {split_mode_from_key(key)}")

for key, count in just_keys.most_common():
    print(f"{key:7} {count:10}")
print()
for mode, count in modes.most_common():
    print(f"{mode:7} {count:10}")

print(f"  splitted count: {len(just_keys) + len(modes)}")
print(f"unsplitted count: {len(keys)}")

K:G          87486
K:D          78424
K:A          41464
K:C          26576
K:E          17758
K:F          15277
K:Bb          7040
K:B           5259
K:none        2898
K:Eb          2033
K:Ab           609
K:F#           339
K:Db           243
K:Gb            29
K:C#            12
K:D#             1
K:Cb             1

            222909
min          33707
dor          18395
mix           9959
phr            405
lyd             73
loc              1
  splitted count: 24
unsplitted count: 62


### Meter

In [90]:
def explore_meter(dataset: MassiveAbcnotationDataset) -> tuple[int, Counter, Counter, Counter]:
    meters = Counter()
    left_meters = Counter()
    right_meters = Counter()
    count_none = 0
    for tune in dataset:
        meters.update([tune.meter])
        meter = tune.split_meter()
        if meter:
            left_meters.update([meter[0]])
            right_meters.update([meter[1]])
        else:
            count_none += 1
    return count_none, meters, left_meters, right_meters

count_none, meters, left_meters, right_meters = explore_meter(dataset)

# for meter, count in left_meters.most_common():
#     print(f"{meter:4} {count}")
# print()
# for meter, count in right_meters.most_common():
#     print(f"{meter:4} {count}")
# print()
# print("none:", count_none)

print(f"  splitted_count: {len(left_meters) + len(right_meters) + 1}")
print(f"unsplitted count: {len(meters)}")


  splitted_count: 46
unsplitted count: 81


### meter outliers

In [91]:
l_few = [lm for lm, c in left_meters.items() if c < 10]
r_few = [rm for rm, c in right_meters.items() if c < 10]
print(l_few)
print(r_few)

def is_meter_outlier(tune: Tune) -> bool:
    meter = tune.split_meter()
    if meter is None:
        return True
    l, r = meter
    return (l in l_few) or (r in r_few)

filtered_tunes = [ tune for tune in dataset if not is_meter_outlier(tune) ]
filtered_dataset = MassiveAbcnotationDataset(filtered_tunes)
print(len(dataset) - len(filtered_tunes))

[18, 432, 14, 22, 63, 46, 17, 45, 21, 26, 28]
[81, 6, 444, 28, 3, 12, 84, 9, 5, 86]
406


### tempo outliers

In [93]:
def normalise_tempo(tempo: int, base: int=10, precision: int=1) -> int:
    x = math.log(tempo, base)
    x = round(x, precision)
    x = base ** x
    return int(round(x))

def is_tempo_outlier(tune: Tune) -> bool:
    splitted_tempo = tune.split_tempo()
    if splitted_tempo is None:
        return False
    tempo = splitted_tempo[2]
    return is_tempo_out_of_bounds(tempo)

def is_tempo_out_of_bounds(tempo: int) -> bool:
    tempo = normalise_tempo(tempo)
    return tempo <= 10 or tempo > 700

filtered_tunes = [ tune for tune in filtered_dataset if not is_tempo_outlier(tune)]
print(len(filtered_dataset) - len(filtered_tunes))
filtered_dataset = MassiveAbcnotationDataset(filtered_tunes)

0


In [10]:
len(filtered_dataset)

284835

In [11]:
count_none, meters, left_meters, right_meters = explore_meter(filtered_dataset)

print(f"  splitted_count: {len(left_meters) + len(right_meters) + 1}")
print(f"unsplitted count: {len(meters)}")

  splitted_count: 25
unsplitted count: 51


In [12]:
temps = Counter()
ratios = Counter()

for tune in filtered_dataset:
    if tune.tempo:
        l, r, t = tune.split_tempo()
        # t = round(int(t), -1)
        temps.update([normalise_tempo(t)])
        ratios.update([(l, r)])

for t, count in temps.most_common():
    print(t, count)

for r, count in ratios.most_common():
    print(r, count)

print(f"temps: {len(temps)}  ratios: {len(ratios)}")

126 21479
100 17233
200 6360
79 5948
158 5591
63 3153
251 2745
316 1240
50 951
398 489
40 301
32 139
501 68
25 16
20 4
631 3
13 2
16 1
(1, 4) 27241
(1, 8) 19727
(1, 2) 8836
(3, 8) 7876
(3, 4) 2022
(3, 16) 12
(1, 1) 6
(3, 2) 3
temps: 18  ratios: 8


In [13]:
sloppy_temps: dict[int, Counter] = {t: Counter() for t in temps.keys()}
for tune in filtered_dataset:
    if tune.tempo == "":
        continue
    l, r, t = tune.split_tempo()
    tempo = normalise_tempo(t)
    sloppy_temps[tempo].update([t])
    
sloppy_to_neat = {}
for sloppy_tempo, counter in sloppy_temps.items():
    most_common_tempo = counter.most_common()[0][0]
    sloppy_to_neat[sloppy_tempo] = most_common_tempo
sloppy_to_neat

def neatly_normalise_tempo(tempo: int) -> int:
    sloppy_to_neat = {126: 120, 79: 80, 200: 180, 158: 160, 100: 100, 63: 60, 40: 40, 32: 30, 251: 232, 398: 360, 50: 50, 316: 300, 501: 460, 25: 28, 20: 18, 13: 12, 631: 600, 16: 15}
    return sloppy_to_neat[normalise_tempo(tempo)]

### tempos after neat normalisation

In [14]:
temps = Counter()
ratios = Counter()

for tune in filtered_dataset:
    if tune.tempo:
        l, r, t = tune.split_tempo()
        # t = round(int(t), -1)
        temps.update([neatly_normalise_tempo(t)])
        ratios.update([(l, r)])

for t, count in temps.most_common():
    print(t, count)

for r, count in ratios.most_common():
    print(r, count)

print(f"temps: {len(temps)}  ratios: {len(ratios)}")

120 21479
100 17233
180 6360
80 5948
160 5591
60 3153
232 2745
300 1240
50 951
360 489
40 301
30 139
460 68
28 16
18 4
600 3
12 2
15 1
(1, 4) 27241
(1, 8) 19727
(1, 2) 8836
(3, 8) 7876
(3, 4) 2022
(3, 16) 12
(1, 1) 6
(3, 2) 3
temps: 18  ratios: 8


({12,
  15,
  18,
  28,
  30,
  40,
  50,
  60,
  80,
  100,
  120,
  160,
  180,
  232,
  300,
  360,
  460,
  600},
 {(1, 1), (1, 2), (1, 4), (1, 8), (3, 2), (3, 4), (3, 8), (3, 16)})

In [39]:
just_keys
modes

Counter({'': 222909,
         'min': 33707,
         'dor': 18395,
         'mix': 9959,
         'phr': 405,
         'lyd': 73,
         'loc': 1})

In [43]:
key_set = set(just_keys.keys())
mode_set = set(modes.keys())

{'', 'dor', 'loc', 'lyd', 'min', 'mix', 'phr'}

In [106]:
def split_mode_from_key(key: str) -> tuple[str, str]:
    key = key.replace("[", "").replace("]", "")
    for mode in Tune.MODES:
        regex = re.compile(mode)
        splitted = regex.split(key)
        if len(splitted) > 1:
            return splitted[0], mode
    return key, ""


def split_meter(meter: str) -> Optional[tuple[int, int]]:
    if meter:
        found = Tune.INTEGER_REGEX.findall(meter)
        return int(found[0]), int(found[1])
    else:
        return None

def split_tempo(tempo: str) -> Optional[tuple[int, int, int]]:
    if tempo:
        found = Tune.INTEGER_REGEX.findall(tempo)
        if len(found) < 3:
            return 1, 1, 1 # invalid tempo
        return int(found[0]), int(found[1]), int(found[2])
    else:
        return None

# split_mode_from_key("[K:Abmin]")
# split_meter("[M:5/6]")
split_tempo("[Q:1/4=250]")

(1, 4, 250)

In [61]:
count_none, meters, left_meters, right_meters = explore_meter(filtered_dataset)
left_meters = set(left_meters.keys())
right_meters = set(right_meters.keys())

In [79]:
tempo_set = set(temps.keys())
tempo_ratio_set = set(ratios.keys())

In [63]:
right_meters

{1, 2, 4, 8, 16, 44}

In [None]:
tune.key_and_mode()

In [170]:
prepared_tunes: list[Tune] = []

In [354]:
re.escape(".af392|")

words = ["hello", "hell", "mark", "marking"]
words = [f"({word})" for word in words]
letters = [chr(ord("a") + i) for i in range(ord("z") - ord("a") + 1)]
# regex = f'{"|".join(words)}|{"|".join(letters)}'
regex = f'{"|".join(words)}'
regex = re.compile(regex)
# regex.split("vaosnehelloaksvlaehellalnvvaemarkanvimarking")

tokens = sorted(["|1", "|2", "|1,3", "|2,4", "|3", "|4"], key= lambda x: len(x), reverse=True)
re_tokens = "|".join([f"({re.escape(t)})" for t in tokens])
regex = re.compile(re_tokens)
regex.split("abcd |1 efgh |2 ebcd |1,3 |2,4")

['|1,3', '|2,4', '|1', '|2', '|3', '|4']


['abcd ',
 None,
 None,
 '|1',
 None,
 None,
 None,
 ' efgh ',
 None,
 None,
 None,
 '|2',
 None,
 None,
 ' ebcd ',
 '|1,3',
 None,
 None,
 None,
 None,
 None,
 ' ',
 None,
 '|2,4',
 None,
 None,
 None,
 None,
 '']

In [360]:
class Tokenizer:

    def __init__(self, multichar_tokens: set[str]):
        self.multichar_tokens = set(multichar_tokens)

        temp = sorted(list(multichar_tokens), key=lambda x: len(x), reverse=True)
        temp = [f"({re.escape(t)})" for t in temp]
        self.regex = re.compile("|".join(temp))
    
    def tokenize(self, text: str) -> Iterator[str]:
        for sth in self.regex.split(text):
            if sth is None:
                continue
            elif sth in self.multichar_tokens:
                yield sth
            else:
                yield from sth

tokenizer = Tokenizer(["|1", "|2", "|1,3", "|2,4", "|3", "|4"])
text = "abcd |1 efgh |2 ebcd |1,3 |2,4"
print(list(tokenizer.tokenize(text)))

['a', 'b', 'c', 'd', ' ', '|1', ' ', 'e', 'f', 'g', 'h', ' ', '|2', ' ', 'e', 'b', 'c', 'd', ' ', '|1,3', ' ', '|2,4']


In [None]:
"""
ab
  c (abc)
"""

In [330]:
class SuffixTree:

    def __init__(self, word: Optional[str]=None):
        self.word: Optional[str] = word
        self.children: dict[str, SuffixTree] = {}
        if word is not None:
            self.add_word(word)

    def is_leaf(self) -> bool:
        return bool(self.children)
    
    def __getitem__(self, char: str) -> Optional[SuffixTree]:
        if char in self.children:
            return self.children[char]
        return None
    
    def _repr(self, buffer: io.StringIO, indent_size=0) -> io.StringIO:
        if self.word:
            buffer.write(f' ({self.word}) ')
        if len(self.children) < 2:
            for char, child in self.children.items():
                buffer.write(char)
                child._repr(buffer, indent_size + 1)
        else:
            indent = " " * indent_size
            for char, child in self.children.items():
                buffer.write(f"\n{indent}{char}")
                child._repr(buffer, indent_size + 1)
        return buffer
    
    def has_word(self) -> bool:
        return bool(self.word)
    
    def has_children(self) -> bool:
        return bool(self.children)
    
    def __repr__(self):
        return self._repr(io.StringIO()).getvalue()

    def add_word(self, word: str, prefix: Optional[str]=None):
        if prefix is None:
            prefix = word
        
        if prefix == "":
            self.word = word
            return
        
        char = prefix[0]
        if char in self.children:
            self.children[char].add_word(word, prefix[1:])
        else:
            node = SuffixTree()
            node.add_word(word, prefix[1:])
            self.children[char] = node

    def find(self, word: str) -> tuple[bool, bool]:
        if word == "":
            return self.has_word(), not self.has_children()
        
        char = word[0]
        if char not in self.children:
            return False, False
        
        return self[char].find(word[1:])


tree = SuffixTree()
tree.add_word("hello")
tree.add_word("hell")
tree.add_word("hee")
tree.add_word("helli")
tree.add_word("he")
tree.add_word("hellim")
tree.add_word("bon")
tree.add_word("bono")
tree.add_word("bo")
tree.add_word("boo")

print(tree)

class Tokenizer:

    MULTI_TOKENS = ("|1", "|2", "|1,3", "|2,4", "|3", "|4")

    def tokenize(self, text: str) -> Iterator[str]:
        working_token = io.StringIO()
        for char in text:
            working_token.write(char)
            wt = working_token.getvalue()
            if wt not in self.MULTI_TOKENS:
                yield from wt


he (he) 
  ll (hell) 
    o (hello) 
    i (helli) m (hellim) 
  e (hee) 
bo (bo) 
  n (bon) o (bono) 
  o (boo) 


In [303]:
tree.children["h"].children["e"].children["l"].children["l"].children

{}

In [287]:

re_instrument = "(\[I:.*?\])"
re_info = r'("\^.*?[^\\]")'
re_down_comment = '("_.*?")'
re_special_symbols = "(!.*?!)"
re_chars = "([xvuO])"

preprocessing = re.compile(f"{re_instrument}|{re_info}|{re_down_comment}|{re_special_symbols}|{re_chars}")

re_context = r'(\[[KMQ]:.*?\])'
re_chord = r'(".*?[^\\]")'

temporarily = re.compile(f"{re_context}|{re_chord}")

dumb_parentheses = re.compile(r'"\^".*?".*?"')

to_cut = [dumb_parentheses, preprocessing, temporarily]
# to_cut = []


re_one = r'(\[[LTPONC]:.*?\])'
re_two = r'(SOSOO)|(TDCO)'
re_three = r'[Hmtsorpnliky]'
re_filter = re.compile(f"{re_one}|{re_two}")
def regex_filter(tune: Tune) -> bool:
    return bool(re_filter.findall(tune.tune))

re_key = re.compile("\[K:.*?\]")
def key_filter(tune: Tune) -> bool:
    for key_change in re_key.findall(tune.tune):
        key, mode = split_mode_from_key(key_change)
        if key not in key_set or mode not in mode_set:
            return True
    return False

re_meter = re.compile("\[M:.*?\]")
def meter_filter(tune: Tune) -> bool:
    for meter_change in re_meter.findall(tune.tune):
        sm = split_meter(meter_change)
        if sm:
            l, r = sm
            if l not in left_meters or r not in right_meters:
                return True
    return False

re_tempo = re.compile("\[Q:.*?\]")
def tempo_filter(tune: Tune) -> bool:
    for tempo_change in re_tempo.findall(tune.tune):
        st = split_tempo(tempo_change)
        if st:
            l, r, t = st
            if is_tempo_out_of_bounds(t):
                return True
            t = neatly_normalise_tempo(t)
            if (l, r) not in tempo_ratio_set or t not in tempo_set:
                return True
    return False

filters = [regex_filter, key_filter, meter_filter, tempo_filter]

re_bar_lines = re.compile(r'(\|\])|(\|\|)|(\[\|)')
def normalise_bar_lines(notes: str) -> str:
    return re_bar_lines.sub("|", notes)

def filter_tune(tune: Tune, filters) -> bool:
    for f in filters:
        if f(tune):
            return True
    return False

re_after = re.compile(r'[Hmtsorpnliky]|( [0-9]+\/?[0-9]*)')
re_repetitions = re.compile(r'\|[0-9]+,?[0-9]*')
def after_filter(notes: str) -> bool:
    if bool(re_after.findall(notes)):
        return True
    if found := re_repetitions.findall(notes):
        for rep in found:
            if rep not in ("|1", "|2", "|1,3", "|2,4", "|3", "|4"):
                return True
    return False


USE_PREPARED = True
if prepared_tunes and USE_PREPARED:
    print("using prepared")
    use_preprocessing = False
    tune_working_set = (t for t in prepared_tunes)
else:
    print("using raw dataset")
    use_preprocessing = True
    prepared_tunes: list[Tune] = []
    tune_working_set = (t for t in filtered_dataset)

# regex = re.compile("\(\D.*?\)")
# regex = re.compile("\[[A-Z]:.*?\]")
# regex = re.compile("\(.*?\)")
# regex = re.compile("\(\..*?\)")



# regex = re.compile(r'([0-9]+(?:\/[0-9])*)')
# regex_cut = re.compile(r'\/[0-9]+')
# regex = re.compile(r'[0-9]+')
# regex = re.compile(r'\|[0-9]+')
# regex = re.compile(r' [0-9]+\/?[0-9]*')

counter = Counter()





# regex = re.compile('"\^"Jig')
# regex = re.compile('"\^"')
parathesis = set()
count = 0
filtered_count = 0
ammended_count = 0
for i, tune in enumerate(tune_working_set):
    notes = tune.tune

    if use_preprocessing:
        if filter_tune(tune, filters):
            filtered_count += 1
            continue

        for r in to_cut:
            notes = r.sub("", notes)

        if notes != tune.tune:
            ammended_count += 1
        
        notes = normalise_bar_lines(notes)

        if after_filter(notes):
            filtered_count += 1
            continue

        new_tune = dc.replace(tune)
        new_tune.tune = notes
        prepared_tunes.append(new_tune)

    parathesis |= set(notes)

    counter.update(set(regex.findall(notes)))
    # if regex.findall(notes):
    #     count += 1
    #     print(tune)

        # if found := regex.findall(notes):
    # if found := re.findall("\|1999,06", notes):
    #     count += 1
    #     if count < 100:
    #         # print(i)
    #         # print(found)
    #         print(tune)
    #         # print(notes)

    # found = regex.findall(notes)
    # parathesis |= set(found)
    # count += 1 if len(found) > 0 else 0

# T (trill)


for l, count in counter.most_common():
    print(l, count)

print(len(parathesis), count / len(filtered_dataset), count)
print("filtered:", filtered_count)
print("ammended:", ammended_count)

# 2 4 8 3 64 32 16 | 7 6 63 6449 60 56 48

using prepared
2 272596
3 171759
4 76007
1 64547
6 21527
3/2 18071
8 6471
5 535
3/4 410
12 398
7 330
7/2 277
7/4 143
16 117
9/4 107
9 82
5/2 64
9/2 32
7/8 28
2/3 28
27/8 21
3/8 16
15 16
14 15
19 14
23 12
10 12
11 9
9/8 7
22 7
32 6
47/6 6
24 6
21 6
33 5
93 5
30 5
13 5
4/3 5
45/6 4
1950 4
21/2 4
25/3 4
17 4
21/8 4
20 4
11/2 4
1774 3
1930 3
15/2 3
11/1 2
1978 2
93/2 2
71 2
1850 2
8/3 2
1979 2
1861 2
5/4 2
11/4 1
1727 1
196 1
1960 1
13/2 1
1653 1
79/6 1
21/3 1
0 1
27 1
1859 1
1770 1
1809 1
23/3 1
1005/5 1
1789 1
99 1
1747 1
65 1
1981 1
63/1 1
3900 1
23/2 1
1882 1
1872 1
44 1
1867 1
1907 1
1805 1
50 1
60 1
1780 1
200 1
75 1
251/6 1
1977 1
25/1 1
33/2 1
13/4 1
1794 1
18 1
1740 1
103 1
45/3 1
101/6 1
1969 1
95 1
1842 1
19/4 1
25/6 1
3388 1
1743 1
83/4 1
3/1 1
243 1
10685 1
37/8 1
50 3.51080450085137e-06 1
filtered: 0
ammended: 0


In [249]:
len(prepared_tunes), len(filtered_dataset)

(220996, 284835)

In [255]:
print(sorted(parathesis))

['\n', ' ', '"', "'", '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '<', '=', '>', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'M', 'P', 'S', 'T', '[', ']', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'z', '{', '|', '}']


In [None]:
# iklmnoprstuvxy HMOPST

In [None]:
# do wywalenia: B C F G H I R S T Z
# do zostawienia: K M Q
# ? L

In [48]:
for key_change in parathesis:
    key, mode = split_mode_from_key(key_change)
    if key not in key_set or mode not in mode_set:
        print(key_change, key, mode)

In [69]:
for meter_change in parathesis:
    sm = split_meter(meter_change)
    if sm:
        l, r = sm
        if l not in left_meters or r not in right_meters:
            print(meter_change, l, r)

In [109]:
for tempo_change in parathesis:
    st = split_tempo(tempo_change)
    if st:
        l, r, t = st
        try:
            if is_tempo_out_of_bounds(t):
                print(tempo_change, (l, r), t)
                continue
            t = neatly_normalise_tempo(t)
            if (l, r) not in tempo_ratio_set or t not in tempo_set:
                print(tempo_change, (l, r), t)
        except Exception as ex:
            print(tempo_change, repr(ex))

In [161]:
print(sorted(parathesis))

['\n', ' ', '"', "'", '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '<', '=', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'M', 'O', 'P', 'S', 'T', '[', '\\', ']', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '{', '|', '}']


In [157]:
for x in sorted(list(parathesis)):
    print(x)

((([fdB]4 [fdB]2)
((=B | (c)
((A A4)
((A B)
((A G2-)
((A c2)
((A | B)
((A |: A>)
((A,C A,2)
((A/B/)
((A/G/)
((A2{BA)
((A3 A)
((A4 A)
((A>c)
((AB)
((AG)
((A_B-)
((B A)
((B G)
((B c d)
((B c)
((B,<B)
((B,A,)
((B/>c/)
((B/d/-)
((B2 d2)
((B2 | A)
((B2B/)
((B4 A2)
((BA)
((BG)
((B[ee])
((Bc)
((Bd)
((B{cB)
((C<c)
((CE)
((D B,2-)
((D d3)
((D | D)
((D6 |1 D4)
((D8 | D8)
((DFF)
((E [AA]2)
((E/ | [FA]2)
((E/G/)
((ED)
((EF)
((EG-)
((E{FE)
((F F2)
((F/A/)
((F/G/)
((F/G/A/B/c/d/e/f/g/a/g/b/a/)
((F2 G)
((F2A2)
((F3/2{GF)
((FE)
((F{GF)
((G A)
((G F)
((G [AA]2)
((G/ A3/2)
((G/>A/)
((G2 A)
((G2 d)
((G4 E2)
((G4 | F)
((GA)
((GA/G/)
((GAB)
((GB)
((GE)
((GF)
(([A,D-][B,D]-)
(([A,E]2 :|2 A2)
(([A,E]2 [Gc])
(([A-D] [AE]2)
(([A-F] [AA]2)
(([A-e][Af])
(([Aa] [Bb])
(([Ac]3 [GB])
(([Ac]4 [Bd]2)
(([Ae] [Bg]2)
(([Ae] [Dd])
(([Ae] [ca]2)
(([Ae] [df]3)
(([Ae] | [Bf]2)
(([Ae]([B-f])
(([Ae]2 e)
(([Ae]2 | [^ca])
(([Ae][Bg]-)
(([Ae][cg])
(([Ae][cg]-)
(([Ae]c)
(([B,D]/[CE]/)
(([B,G]2 | [CA]6)
(([B-g]([B-a])
(([BF]4 | [cE