Skip to content

Commit

Permalink
Add Tatsu to have a real PEG parser from the EBNF grammar (#194)
Browse files Browse the repository at this point in the history
Add 竜 TatSu as a dependency.
This enables us to have a real PEG parser and not a combination of
regexes and string splitting.

Fix parsing of quoted values as well as escaped semi-columns
This fixes #185 and fixes #193

Note : Adding Tatsu might have made the parser significantly slower in some cases.
  • Loading branch information
C4ptainCrunch committed Aug 17, 2019
1 parent 61453e4 commit 6b71a49
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 40 deletions.
15 changes: 13 additions & 2 deletions CHANGELOG.rst
Expand Up @@ -7,13 +7,24 @@ Ics.py changelog
0.6
**************

Major changes:
- Drop support for Python 3.5. Python 3.7 is now distributed in both Ubuntu LTS
and Debian stable, the PSF is providing only security fixes. It's time
to move on !
- Add `竜 TatSu <https://pypi.org/project/TatSu/>`_ as a dependency.
This enables us to have a real PEG parser and not a combination of
regexes and string splitting.

Minor features:
- Add mypy
- Drop support for Python 3.5. Python 3.7 is now distributed in both Ubuntu LTS and Debian stable,
the PSF is providing only security fixes. It's time to move on !
- Add GEO (thanks @johnnoone !)

Bug fixes:
- Events no longer have the TRANSP property by default (Fixes #190)
- Fix parsing of quoted values as well as escaped semi-columns (#185 and #193)

Regressions:
- Adding Tatsu might have made the parser significantly slower in some cases.


**************
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Expand Up @@ -3,6 +3,7 @@ include CHANGELOG.rst
include LICENSE
include AUTHORS.rst
include CONTRIBUTING.rst
include ics/contentline.ebnf
include meta.py
include tests/fixtures/README

Expand Down
5 changes: 1 addition & 4 deletions README.rst
Expand Up @@ -6,9 +6,6 @@ Ics.py : iCalendar for Humans
.. image:: https://travis-ci.org/C4ptainCrunch/ics.py.png?branch=master
:target: https://travis-ci.org/C4ptainCrunch/ics.py

.. image:: https://coveralls.io/repos/C4ptainCrunch/ics.py/badge.png
:target: https://coveralls.io/r/C4ptainCrunch/ics.py
:alt: Coverage

.. image:: https://img.shields.io/github/license/c4ptaincrunch/ics.py.svg
:target: https://pypi.python.org/pypi/ics/
Expand All @@ -21,7 +18,7 @@ iCalendar is a widely-used and useful format but not user friendly. Ics.py is th

It should be able to parse every calendar that respects the `rfc5545 <http://tools.ietf.org/html/rfc5545>`_ and maybe some more… It also outputs rfc compliant calendars.

iCalendar (file extension `.ics`) is used by Sunbird, Google Calendar, Apple Calendar, Android
iCalendar (file extension `.ics`) is used by Google Calendar, Apple Calendar, Android and many more.


Ics.py is available for Python>=3.6 and is Apache2 Licensed.
Expand Down
31 changes: 31 additions & 0 deletions ics/contentline.ebnf
@@ -0,0 +1,31 @@
@@grammar::contentline
@@whitespace :: //
start = contentline $ ;
ALPHA = ?"[a-zA-Z]" ;
DIGIT = ? "[0-9]" ;
CRLF = "\r\n" ;
WSP = " ";
DQUOTE = '"' ;
QSAFE_CHAR = WSP | ?"\x21" | ?"[\x23-\x7E]" | ?"[\u0080-\uffff]";
SAFE_CHAR = WSP | ?"\x21" | ?"[\x23-\x2B]" | ?"[\x2D-\x39]" | ?"[\x3C-\x7E]" | ?"[\u0080-\uffff]" ;
VALUE_CHAR = WSP | ?"[\x21-\x7E]" | ?"[\u0080-\uffff]";
name = iana_token | x_name ;
iana_token = {(ALPHA | DIGIT | "-")}+ ;
x_name = "X-" [vendorid "-"] {(ALPHA | DIGIT | "-")}+ ;
vendorid = (ALPHA | DIGIT) (ALPHA | DIGIT) {(ALPHA | DIGIT)}+ ;
contentline = name:name {(";" params+:param )}* ":" value:value CRLF ;
param = name:param_name "=" values+:param_value {("," values+:param_value)}* ;
param_name = iana_token | x_name ;
param_value = quoted_string | paramtext ;
paramtext = {SAFE_CHAR}* ;
value = {VALUE_CHAR}* ;
quoted_string = DQUOTE @:{QSAFE_CHAR}* DQUOTE ;
59 changes: 26 additions & 33 deletions ics/parse.py
Expand Up @@ -2,11 +2,18 @@
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, absolute_import
from pathlib import Path

import collections
import tatsu

CRLF = '\r\n'

grammar_path = Path(__file__).parent.joinpath('contentline.ebnf')

with open(grammar_path) as fd:
GRAMMAR = tatsu.compile(fd.read())


class ParseError(Exception):
pass
Expand Down Expand Up @@ -58,24 +65,18 @@ def __setitem__(self, item, *values):

@classmethod
def parse(cls, line):
if ':' not in line:
raise ParseError("No ':' in line '{}'".format(line))

# Separate key and value
splitted = line.split(':', 1)
key, value = splitted[0], splitted[1].strip()

# Separate name and params
splitted = key.split(';')
name, params_strings = splitted[0], splitted[1:]
try:
ast = GRAMMAR.parse(line + CRLF)
except tatsu.exceptions.FailedToken:
raise ParseError()

# Separate key and values for params
name = ''.join(ast['name'])
value = ''.join(ast['value'])
params = {}
for paramstr in params_strings:
if '=' not in paramstr:
raise ParseError("No '=' in line '{}'".format(paramstr))
pname, pvals = paramstr.split('=', 1)
params[pname] = pvals.split(',')
for param_ast in ast.get('params', []):
param_name = ''.join(param_ast["name"])
param_values = [''.join(x) for x in param_ast["values_"]]
params[param_name] = param_values
return cls(name, params, value)

def clone(self):
Expand Down Expand Up @@ -170,20 +171,12 @@ def string_to_container(txt):
return lines_to_container(txt.splitlines())


if __name__ == "__main__":
from tests.fixture import cal1

def print_tree(elem, lvl=0):
if isinstance(elem, list) or isinstance(elem, Container):
if isinstance(elem, Container):
print("{}{}".format(' ' * lvl, elem.name))
for sub_elem in elem:
print_tree(sub_elem, lvl + 1)
elif isinstance(elem, ContentLine):
print("{}{}{}".format(' ' * lvl,
elem.name, elem.params, elem.value))
else:
print('Wuuut?')

cal = string_to_container(cal1)
print_tree(cal)
def interpret_ast(ast):
name = ''.join(ast['name'])
value = ''.join(ast['value'])
params = {}
for param_ast in ast.get('params', []):
param_name = ''.join(param_ast["name"])
param_values = [''.join(x) for x in param_ast["values_"]]
params[param_name] = param_values
return ContentLine(name, params, value)
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,3 +1,4 @@
python-dateutil
arrow>=0.11,<0.12
six>1.5
tatsu>4.2
13 changes: 12 additions & 1 deletion tests/contentline.py
Expand Up @@ -7,7 +7,6 @@ class TestContentLine(unittest.TestCase):

dataset = {
'HAHA:': ContentLine('haha'),
':hoho': ContentLine('', {}, 'hoho'),
'HAHA:hoho': ContentLine('haha', {}, 'hoho'),
'HAHA:hoho:hihi': ContentLine('haha', {}, 'hoho:hihi'),
'HAHA;hoho=1:hoho': ContentLine('haha', {'hoho': ['1']}, 'hoho'),
Expand Down Expand Up @@ -44,6 +43,18 @@ class TestContentLine(unittest.TestCase):
{'hoho': ['p1', 'p2'], 'hihi': ['p3', 'p4', 'p5']},
'blabla:blublu'
),
r'ATTENDEE;X-A="I&rsquo\;ll be in NYC":mailto:a@a.com':
ContentLine(
'ATTENDEE',
{'X-A': [r"I&rsquo\;ll be in NYC"]},
'mailto:a@a.com',
),
'DTEND;TZID="UTC":20190107T000000':
ContentLine(
"DTEND",
{'TZID': ['UTC']},
"20190107T000000"
)
}

def test_errors(self):
Expand Down

0 comments on commit 6b71a49

Please sign in to comment.