In [1]:
import re
import json
from pprint import pprint
from yaml import safe_load
import toolz


In [2]:
def clean_title(st):
    title = st.strip('"\' \n#')
    title,_, ctx = title.rpartition('(')
    return title.strip(), ctx.strip(' )')

def youtube_embed(st):
    _, _, ref = st.partition('v=')
    return 'https://www.youtube.com/embed/' + ref


def remove_empty_lines(st):
    return '\n'.join(line for line in st.splitlines() if not line.isspace())


def parse_re(regex, doc):
    m = regex.match(doc)
    if m is None:
        raise TypeError('do not match document')

    i, j = m.span()
    return m.group(1), doc[j:]



def parse_rant(rant):
    text, _, source = rant.rpartition('<')
    source = source.rstrip('>')
    text = text.strip()
    return {'text': text, 'source': parse_source(source)}




def parse_story(st):
    try:
        st = st.strip()
        story = {}

        # Split event from data
        head, *events = st.split('\n##') 
        title, meta, content = parse_ternary(head.strip())
        story['utter'], story['context'] = clean_title(title)

        # Process content
        *quote, who = content.splitlines()
        story['bible'] = '\n'.join(quote)
        story['ref'] = who.strip(' -')

        # Process yaml
        story['image'] = meta['capa'] or '/static/generic.jpg'
        story['youtube'] = youtube_embed(meta['video'])

        if meta.get('outras', ()):
            story['rants'] = list(map(lambda x: parse_rant(x.lstrip('- ')), meta['outras']))
        else:
            story['rants'] = []

        # Process events
        story['events'] = list(map(parse_event, events))
    except:
        print('story:', st.partition('\n')[0])
        raise
    return story

In [3]:
stories = list(map(parse_story, open('data.md').read().split('\n---')))

with open('static/data.json', 'w') as fd:
    json.dump(stories[:], fd)
    
stories[0]

{'utter': 'Vamos fuzilar a petralhada',
 'context': 'Jair Bolsonaro, em comício no Acre',
 'bible': 'Aí Jesus disse: "Guarde a sua espada, pois quem usa a espada será morto por uma espada".',
 'ref': 'Mateus 26:52',
 'image': '/static/pomba-laranja.jpg',
 'youtube': 'https://www.youtube.com/embed/FYErb6oriiU',
 'rants': [{'text': 'Quanto mais se matar, melhor',
   'source': {'name': 'Youtube',
    'url': 'https://www.youtube.com/watch?v=Ii-Bo9HPAeE'}},
  {'text': 'Violência se combate com violência',
   'source': {'name': 'Youtube',
    'url': 'https://www.youtube.com/watch?v=o8ECr0eDEGo'}},
  {'text': 'Bolsonaro ensina criança a fazer gesto de arma e causa revolta',
   'source': {'name': 'Youtube',
    'url': 'https://www.youtube.com/watch?v=hpep_70CWlw'}}],
 'events': [{'title': 'Mestre de capoeira é morto por eleitor de Bolsonaro após declarar voto no PT',
   'image': 'https://extra.globo.com/incoming/23139253-c7d-8af/w640h360-PROP/xmoa.jpg.pagespeed.ic.jMLoqYFCWZ.jpg',
   'text': '