Skip to content

Commit

Permalink
yaml file support
Browse files Browse the repository at this point in the history
jsonpath supprot for record_level param
  • Loading branch information
daigotanaka committed May 3, 2021
1 parent 11bb8c3 commit 6f00442
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 7 deletions.
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
## History

### 0.2.0 (2021-05-02)

- feature: jsonpath support for the record_level parameter in getschema.infer_schema function.
- feature: YAML file support

### 0.1.2 (2020-12-22)

- fix: allow empty dict (Issue #1)
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[![Build Status](https://travis-ci.com/daigotanaka/getschema.svg?branch=master)](https://travis-ci.com/daigotanaka/getschema)

💥 New: jsonpath support for the record_level parameter in getschema.infer_schema function.

# getschema

Get jsonschema from sample records
Expand All @@ -17,7 +19,7 @@ optional arguments:
-h, --help show this help message and exit
--indent INDENT, -i INDENT
Number of spaces for indentation
--type TYPE, -t TYPE Record format (json, csv)
--type TYPE, -t TYPE Record format (json, yaml, csv)
--skip SKIP, -s SKIP Skip first n records. Don't skip the header row.
--lower, -l Convert the keys to lower case'
--replace_special REPLACE_SPECIAL, -r REPLACE_SPECIAL
Expand All @@ -31,6 +33,7 @@ Module functions:
(See impl.py)
- infer_schema
- infer_from_json_file
- infer_from_yaml_file
- infer_from_csv_file
- fix_type

Expand Down
2 changes: 1 addition & 1 deletion bin/release_build
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

./test/install_test.sh &&
./tests/install_test.sh &&

python3 setup.py clean --all
rm -fr dist
Expand Down
2 changes: 1 addition & 1 deletion getschema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def main():
parser.add_argument("--indent", "-i", default=2, type=int,
help="Number of spaces for indentation")
parser.add_argument("--type", "-t", default="json", type=str,
help="Record format (json, csv)")
help="Record format (json, yaml, csv)")
parser.add_argument("--skip", "-s", default=0, type=int,
help="Skip first n records. Don't skip the header row.")
parser.add_argument("--lower", "-l", default=False, action="store_true",
Expand Down
27 changes: 25 additions & 2 deletions getschema/impl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
import argparse, csv, datetime, dateutil, os, re, sys
from dateutil.tz import tzoffset
import jsonpath_ng as jsonpath
import simplejson as json
import yaml

# JSON schema follows:
# https://json-schema.org/
Expand All @@ -19,14 +21,19 @@ def _convert_key(old_key, lower=False, replace_special=False, snake_case=False):
return new_key


def _get_jsonpath(raw, path):
jsonpath_expr = jsonpath.parse(path)
record = [match.value for match in jsonpath_expr.find(raw)]
return record


def _do_infer_schema(obj, record_level=None, lower=False,
replace_special=False, snake_case=False):
schema = dict()

# Go down to the record level if specified
if record_level:
for x in record_level.split(","):
obj = obj[x]
obj = _get_jsonpath(obj, record_level)[0]

if obj is None:
return None
Expand Down Expand Up @@ -203,6 +210,19 @@ def infer_from_json_file(filename, skip=0, lower=False, replace_special=False,
return schema


def infer_from_yaml_file(filename, skip=0, lower=False, replace_special=False,
snake_case=False):
with open(filename, "r") as f:
content = f.read()
data = yaml.load(content, Loader=yaml.FullLoader)
if type(data) is list:
data = data[skip:]
schema = infer_schema(data, lower=lower, replace_special=replace_special,
snake_case=snake_case)

return schema


def infer_from_csv_file(filename, skip=0, lower=False, replace_special=False,
snake_case=False):
with open(filename) as f:
Expand All @@ -223,6 +243,9 @@ def infer_from_file(filename, fmt="json", skip=0, lower=False,
if fmt == "json":
schema = infer_from_json_file(
filename, skip, lower, replace_special, snake_case)
if fmt == "yaml":
schema = infer_from_yaml_file(
filename, skip, lower, replace_special, snake_case)
elif fmt == "csv":
schema = infer_from_csv_file(filename, skip, lower, replace_special, snake_case)
else:
Expand Down
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
from setuptools import setup

VERSION = "0.1.2"
VERSION = "0.2.0"

with open("README.md", "r") as fh:
long_description = fh.read()
Expand Down Expand Up @@ -29,8 +29,10 @@

install_requires=[
"setuptools>=40.3.0",
"jsonpath-ng>=1.5.2",
"python-dateutil>=2.8.1",
"simplejson==3.11.1"
"simplejson==3.11.1",
"pyyaml>=5.1",
],
entry_points="""
[console_scripts]
Expand Down

0 comments on commit 6f00442

Please sign in to comment.