Skip to content

ZeroGachis/magicparse

Repository files navigation

magicparse 🛸

Declarative parser

Usage

Parse content

import magicparse


schema = {
    "file_type": "csv",
    "has_header": False,
    "delimiter": ";",
    "fields": [
        {
            "key": "ean",
            "column-number": 2,
            "type": "str",
            "validators": [
                {
                    "name": "regex-matches",
                    "parameters": {"pattern": "^\\d{13}$"},
                }
            ],
        },
        {"key": "label", "column-number": 3, "type": "str"},
        {"key": "family-code", "column-number": 8, "type": "str"},
        {
            "key": "vat",
            "column-number": 10,
            "type": "decimal",
            "optional": False,
        },
        {
            "key": "initial-price",
            "column-number": 11,
            "type": "decimal",
            "post-processors": [
                {
                    "name": "divide",
                    "parameters": {"denominator": 100},
                },
                {
                "name": "round",
                "parameters": {"precision": 3},
                }
            ]
        },
        {
            "key": "unit-of-measurement",
            "column-number": 12,
            "type": "int",
            "pre-processors": [
                {
                    "name": "map",
                    "parameters": {"values": {"K": 0, "A": 1, "L": 2}},
                }
            ],
        }
    ],
    "computed-fields": [
        {
            "key": "code",
            "type": "str",
            "builder": {
                "name": "concat",
                "parameters": {"fields": ["code_1", "code_2"]},
            }
        },
        {
            "key": "volume",
            "type": "decimal",
            "builder": {
                "name": "divide",
                "parameters": {
                    "numerator": "price",
                    "denominator": "price_by_unit",
                },
            }
        },
        {
            "key": "price_by_unit",
            "type": "decimal",
            "builder": {
                "name": "multiply",
                "parameters": {
                    "x_factor": "price",
                    "y_factor": "unit",
                }
            }
        }
    ],
}


rows, errors= magicparse.parse(data="...", schema=schema)

Register a custom transform and parse content

from uuid import UUID
import magicparse

class GuidConverter(magicparse.TypeConverter):
    @staticmethod
    def key() -> str:
        return "guid"

    def apply(self, value):
        return UUID(value)


magicparse.register(GuidConverter)

schema = {
    "file_type": "csv",
    "fields": [
        {"key": "shop-guid", "type": "guid", "column-number": 1}
    ],
}

rows, errors = magicparse.parse("13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2", schema)
assert rows == [{"shop-guid": "13ec10cc-cc7e-4ee9-b091-9caa6d11aeb2"}]
assert not errors

Register a custom schema and parse content

import magicparse

class PipedSchema(magicparse.Schema):
    @staticmethod
    def key() -> str:
        return "piped"

    def get_reader(self, stream):
        for item in stream.read().split("|"):
            yield [item]

magicparse.register(PipedSchema)

schema = {
    "file_type": "piped",
    "fields": [
        {"key": "name", "type": "str", "column-number": 1}
    ]
}

rows, errors = magicparse.parse("Joe|William|Jack|Averell", schema)
assert not errors
assert rows == [{"name": "Joe"}, {"name": "William"}, {"name": "Jack"}, {"name": "Averell"}]

API

File types

  • CSV (with or without header)
  • Columnar

Fields

Types

  • str
  • int
  • decimal
  • datetime (timezone aware)
  • time (timezone aware)

Pre-processors

  • left-pad-zeroes
  • map
  • regex-extract
  • replace
  • strip-whitespaces
  • left-strip

Validators

  • regex-matches
  • greater-than

Post-processors

  • divide
  • round

Computed Fields

Types, Pre-processors, Post-processors and validator is same as Field

Builder

  • concat
  • divide
  • multiply