In [3]:
import re
from typing import Dict

import yaml

import re

In [19]:
def _yml_tuple_constructor(loader, node):
    # this little parse is really just for what I needed, feel free to change it!
    def parse_tup_el(el):
        # try to convert into int or float else keep the string
        if el.isdigit():
            return int(el)
        try:
            return float(el)
        except ValueError:
            return el

    value = loader.construct_scalar(node)
    # remove the ( ) from the string
    tup_elements = [ele.strip() for ele in value[1:-1].split(",")]
    # remove the last element if the tuple was written as (x,b,)
    if tup_elements[-1] == "":
        tup_elements.pop(-1)
    tup = tuple(map(parse_tup_el, tup_elements))
    return tup


In [4]:
test_string = """
type: kinetic-spectrum

k_matrix:
  km1:
    matrix:
      (s1, s1): '1'
      (s2, s1): '2'

spectral_constraints:
  - type: zero
    compartment: s1
    interval:
      - (1, 100)
      - (2, 200)
  - [zero, s1, [(1, 100), (2, 200)]]

equal_area_penalties:
  - type: equal_area
    source: s3
    source_intervals: (670, 810)]
    target: s2
    target_intervals: (670, 810)]
    parameter: 55
    weight: 0.0016

spectral_relations:
  - compartment: s1
    target: s2
    parameter: 8
    interval: [(1,100), (2,200)]

weights:
  - datasets: [d1, d2]
    global_interval: (100, 102)
    model_interval: (301, 502)
    value: 42
"""

In [24]:
# !tuple is my own tag name, I think you could choose anything you want
yaml.FullLoader.add_constructor("!tuple", _yml_tuple_constructor)
# this is to spot the strings written as tuple in the yaml
yaml.FullLoader.add_implicit_resolver("!tuple", re.compile(r"\(.+(.*?,.*?).+\)"), '[]')

spec = yaml.load(test_string, Loader=yaml.FullLoader)
print(spec)

{'type': 'kinetic-spectrum', 'k_matrix': {'km1': {'matrix': {('s1', 's1'): '1', ('s2', 's1'): '2'}}}, 'spectral_constraints': [{'type': 'zero', 'compartment': 's1', 'interval': [(1, 100), (2, 200)]}, ['zero', 's1', ['(1', '100)', '(2', '200)']]], 'equal_area_penalties': [{'type': 'equal_area', 'source': 's3', 'source_intervals': (670, '810)'), 'target': 's2', 'target_intervals': (670, '810)'), 'parameter': 55, 'weight': 0.0016}], 'spectral_relations': [{'compartment': 's1', 'target': 's2', 'parameter': 8, 'interval': ['(1', '100)', '(2', '200)']}], 'weights': [{'datasets': ['d1', 'd2'], 'global_interval': (100, 102), 'model_interval': (301, 502), 'value': 42}]}


In [6]:
spec = yaml.load(test_string, Loader=yaml.SafeLoader)
print(spec)

{'type': 'kinetic-spectrum', 'k_matrix': {'km1': {'matrix': {'(s1, s1)': '1', '(s2, s1)': '2'}}}, 'spectral_constraints': [{'type': 'zero', 'compartment': 's1', 'interval': ['(1, 100)', '(2, 200)']}, ['zero', 's1', ['(1', '100)', '(2', '200)']]], 'equal_area_penalties': [{'type': 'equal_area', 'source': 's3', 'source_intervals': '(670, 810)]', 'target': 's2', 'target_intervals': '(670, 810)]', 'parameter': 55, 'weight': 0.0016}], 'spectral_relations': [{'compartment': 's1', 'target': 's2', 'parameter': 8, 'interval': ['(1', '100)', '(2', '200)']}], 'weights': [{'datasets': ['d1', 'd2'], 'global_interval': '(100, 102)', 'model_interval': '(301, 502)', 'value': 42}]}


In [18]:
reg_exp = r"\((.*?,.*?)\)"
re.compile(reg_exp)
re.findall(reg_exp, test_string)

['s1, s1',
 's2, s1',
 '1, 100',
 '2, 200',
 '1, 100',
 '2, 200',
 '670, 810',
 '670, 810',
 '1,100',
 '2,200',
 '100, 102',
 '301, 502']

In [36]:
re.compile(reg_exp)

re.compile(r'(\$[\w\d\.]+)', re.UNICODE)

In [37]:
re.findall(reg_exp, test_strings[0])

['$1']

In [38]:
for s in test_strings:
    print(re.findall(reg_exp, s))

['$1']
['$kinetic.1', '$kinetic.2', '$kinetic.3']
[]
['$kinetic.1', '$kinetic.2']
['$kinetic.4']
['$kinetic.5']
['$group.sub_group.param1', '$kinetic6']
['$foo.7.bar', '$kinetic6']
['$1']
['$1', '$2']
['$1', '$5']


In [39]:
for s in test_strings:
    print(re.findall(reg_exp, s))

['$1']

In [8]:
valid_names = [
    "1",
    "valid1",
    "_valid2",
    "extra_valid3",
]

invalid_names = [
    "testé",
    "kinetic.1",
    "kinetic_red.3",
    "foo.7.bar",
    "_ilikeunderscoresatbegeninngin.justbecause",
    "42istheanswer.42",
    "kinetic::red",
    "kinetic_blue+kinetic_red",
    "makesthissense=trueandfalse",
    "what/about\slashes",
    "$invalid" ,
]

In [136]:
reg_exp_valid = r".*\W+"
validator = re.compile(reg_exp_valid, flags=re.ASCII)

In [137]:
for s in valid_names:
    print(validator.match(s),s)

None 1
None valid1
None _valid2
None extra_valid3


In [138]:
for s in invalid_names:
    print(validator.match(s),s)

<re.Match object; span=(0, 5), match='testé'> testé
<re.Match object; span=(0, 8), match='kinetic.'> kinetic.1
<re.Match object; span=(0, 12), match='kinetic_red.'> kinetic_red.3
<re.Match object; span=(0, 6), match='foo.7.'> foo.7.bar
<re.Match object; span=(0, 31), match='_ilikeunderscoresatbegeninngin.'> _ilikeunderscoresatbegeninngin.justbecause
<re.Match object; span=(0, 14), match='42istheanswer.'> 42istheanswer.42
<re.Match object; span=(0, 9), match='kinetic::'> kinetic::red
<re.Match object; span=(0, 13), match='kinetic_blue+'> kinetic_blue+kinetic_red
<re.Match object; span=(0, 15), match='makesthissense='> makesthissense=trueandfalse
<re.Match object; span=(0, 11), match='what/about\\'> what/about\slashes
<re.Match object; span=(0, 1), match='$'> $invalid


In [66]:
for s in valid_names:
    print(re.findall(r"\W+",s, flags=re.ASCII),s)
print("----")
for s in invalid_names:
    print(re.findall(r"\W+",s, flags=re.ASCII),s)

[] 1
[] valid1
[] _valid2
[] extra_valid3
----
['é'] testé
['.'] kinetic.1
['.'] kinetic_red.3
['.', '.'] foo.7.bar
['.'] _ilikeunderscoresatbegeninngin.justbecause
['.'] 42istheanswer.42
['::'] kinetic::red
['+'] kinetic_blue+kinetic_red
['='] makesthissense=trueandfalse
['/', '\\'] what/about\slashes
['$'] $invalid
