Skip to content

Commit

Permalink
Add 2 more online JSON datasets for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
bogdandm committed Nov 29, 2018
1 parent b77d983 commit 099e16d
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 9 deletions.
57 changes: 57 additions & 0 deletions testing_tools/real_apis/large_data_set_github_online.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
City Lots San Francisco dataset (https://github.com/zemirco/sf-city-lots-json)
"""
import json
from datetime import datetime

try:
from tqdm import tqdm
except ImportError:
tqdm = lambda x, **kwargs: x
from json_to_models.generator import MetadataGenerator
from json_to_models.models import compose_models
from json_to_models.models.attr import AttrsModelCodeGenerator
from json_to_models.models.base import generate_code
from json_to_models.registry import ModelRegistry
import requests

URL = "https://raw.githubusercontent.com/zemirco/sf-city-lots-json/master/citylots.json"


def load_data() -> dict:
r = requests.get(URL, stream=True)
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024
bytes_data = b""
print(f"Start downloading data (total size {total_size / block_size:.2f}Mb)")
for data in tqdm(r.iter_content(block_size), total=total_size // block_size, unit='Mb', unit_scale=True):
bytes_data += data
data = bytes_data.decode("utf-8")
return json.loads(data)


def main():
data = load_data()

start_t = datetime.now()
gen = MetadataGenerator()
reg = ModelRegistry()
print("Start generating metadata...")
fields = gen.generate(data)
print("Start generating models tree...")
reg.process_meta_data(fields, model_name="Cities")
print("Merging models...")
reg.merge_models(generator=gen)
print("Generating names...")
reg.generate_names()

print("Generating structure...")
structure = compose_models(reg.models_map)
print("Generating final code...")
code = generate_code(structure, AttrsModelCodeGenerator)
print(code)
print(f"{(datetime.now() - start_t).total_seconds():.4f} seconds")


if __name__ == '__main__':
main()
27 changes: 18 additions & 9 deletions testing_tools/real_apis/randomapis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
Example uses the following APIs:
- CHRONICLING API (https://chroniclingamerica.loc.gov/about/api/)
- Launch Library Reading API (https://launchlibrary.net/docs/1.3/api.html)
- University Domains and Names Data List (https://github.com/Hipo/university-domains-list)
"""
import requests

from json_to_models.generator import MetadataGenerator
from json_to_models.models import compose_models
from json_to_models.models.attr import AttrsModelCodeGenerator
from json_to_models.models.base import generate_code
from json_to_models.registry import ModelRegistry
from testing_tools.pprint_meta_data import pretty_format_meta
from testing_tools.real_apis import dump_response


Expand All @@ -19,24 +22,30 @@ def launchlibrary(mission_name="GPS"):
return requests.get(f"https://launchlibrary.net/1.3/mission/{mission_name}").json()


def university_domains():
return requests.get("https://raw.githubusercontent.com/Hipo/university-domains-list/master/"
"world_universities_and_domains.json").json()


def main():
chroniclingamerica_data = chroniclingamerica()
dump_response("other", "chroniclingamerica", chroniclingamerica_data)

launchlibrary_data = launchlibrary()
dump_response("other", "launchlibrary", launchlibrary_data)

university_domains_data = university_domains()
dump_response("other", "university_domains", university_domains_data)

gen = MetadataGenerator()
reg = ModelRegistry()
for data in ([chroniclingamerica_data], [launchlibrary_data]):
for data in ([chroniclingamerica_data], [launchlibrary_data], university_domains_data):
fields = gen.generate(*data)

print(pretty_format_meta(fields))
print('-' * 10)

model = reg.process_meta_data(fields)
print(pretty_format_meta(model))
print('-' * 10)
reg.process_meta_data(fields)
reg.merge_models(generator=gen)
reg.generate_names()
structure = compose_models(reg.models_map)
print(generate_code(structure, AttrsModelCodeGenerator))


if __name__ == '__main__':
Expand Down

0 comments on commit 099e16d

Please sign in to comment.