# Profiling

In [1]:
from chmp.tools.profiler import collect_profile, plot_profile

from bokeh.plotting import output_notebook
import numpy as np

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
output_notebook()

In [3]:
with collect_profile() as profile:
    for _ in range(20_000):
        np.random.normal(size=1_000).mean()

In [4]:
profile.show()

# Looping

In [5]:
import time

from chmp.ds import Loop

In [6]:
# simple usage
for loop, _ in Loop.over(range(10)):
    time.sleep(0.5)
    loop.print(f'{loop}')
print()    
print(loop)
print()

# use loop nesting while showing both the outer and the nested progress
for loop, _ in Loop.over(range(10)):
    for _ in loop.nest(range(10)):
        time.sleep(0.1)
        loop.print(f'[{loop:br} ({loop[-1]:rfB})]')
print()
print(loop)

[⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ 5.04s / 5.04s]                                                                                              
[done. took 5.04s]

[⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿ 0.00s (0.00s 100.0% ⣿)]                                                                                     
[done. took 10.45s]


# Building custom parsers 

Example: WebVTT Parser

In [7]:
import re

import chmp.parser as p

    
def webvtt_parser():
    def build_vtt(m):
        result = {}
        for d in m:
            result.update(d)
        
        return result
    
    def concat_subtitles(m):
        return [{'subtitles': list(m)}]

    return p.apply(
        build_vtt,
        p.sequential(
            p.ignore(p.regex('^WEBVTT$')),
            header(),
            empty_line(),
            p.apply(
                concat_subtitles,
                p.repeat(subtitle()),
            ),
        ),
    )


def empty_line():
    return p.ignore(p.regex('^$'))


def header():
    def build_header(m):
        result = {}

        for d in m:
            result[d['key']] = d['value'].strip()

        return [{'header': result}]

    return p.apply(
        build_header,
        p.repeat(p.regex(r'^(?P<key>\w+):(?P<value>.*)$'))
    )


def timestamp():
    timestamp_pattern = r'''
        ^
            (?:(?P<from_hours>\d{2}):)?
            (?P<from_minutes>\d{2}):
            (?P<from_seconds>\d{2})\.
            (?P<from_milliseconds>\d{3}) 

            \s*-->\s*

            (?:(?P<to_hours>\d{2}):)?
            (?P<to_minutes>\d{2}):
            (?P<to_seconds>\d{2})\.
            (?P<to_milliseconds>\d{3})
            
            (?P<params>.*)
        $
    '''

    def _def(d, k, default):
        res = d.get(k)
        if res is not None:
            return res

        return default

    def _to_seconds(d, prefix):
        return (
            24 * 60 * int(_def(d, f'{prefix}_hours', 0)) +
            60 * int(d[f'{prefix}_minutes']) +
            int(d[f'{prefix}_seconds']) + 
            1e-3 * int(d[f'{prefix}_milliseconds'])
        )

    return p.map(
        lambda d: {'from': _to_seconds(d, 'from'), 'to': _to_seconds(d, 'to')},
        p.regex(timestamp_pattern, re.VERBOSE),
    )


def subtitle():
    def build_subtitle(m):
        result = {}
        for d in m:
            result.update(d)
        
        result['lines'] = ' '.join(result.get('lines', []))
        
        return [result]
    
    return p.apply(
        build_subtitle,
        p.sequential(
            timestamp(),
            p.apply(
                lambda m: [{'lines': [d['line'] for d in m]}],
                p.repeat(p.regex(r'^(?P<line>.+)$'))
            ),
            p.ignore(
                p.first(
                    empty_line(),
                    p.end_of_sequence(),
                ),
            ),
        ),
    )

In [8]:
lines = [
    'WEBVTT',
    'Kind: captions',
    'Language: en',
    '',
    '00:09.000 --> 00:11.000',
    'Bar',
    '',
    '00:11.000 --> 00:13.000',
    'Foo',
    '',
    '00:13.000 --> 00:16.000',
    'Hello',
    '',
    '00:16.000 --> 00:18.000',
    'World',
]

parser = webvtt_parser()
parsed = p.parse(parser, lines)
print(parsed)

{'header': {'Kind': 'captions', 'Language': 'en'}, 'subtitles': [{'from': 9.0, 'to': 11.0, 'lines': 'Bar'}, {'from': 11.0, 'to': 13.0, 'lines': 'Foo'}, {'from': 13.0, 'to': 16.0, 'lines': 'Hello'}, {'from': 16.0, 'to': 18.0, 'lines': 'World'}]}
