In [1]:
import sys

sys.path.append("../jsonviate")
from pathlib import Path
import json
from jsonviate import JsonToWeaviate

In [2]:
data_dir = Path("../___data/")

In [3]:
# load data
with open(data_dir / "awsdocs" / "input" / "repositories.json") as f:
    data = json.load(f)

In [4]:
# get keys that are lists or dicts as models
keys = [
    json.loads(key)
    for key in list(
        set([json.dumps({k: str(type(v))}) for item in data for k, v in item.items()])
    )
]
models = {
    k: v
    for d in keys
    for k, v in d.items()
    if v in ["<class 'list'>", "<class 'dict'>"]
}
print(models)

{'topics': "<class 'list'>", 'owner': "<class 'dict'>", 'permissions': "<class 'dict'>", 'license': "<class 'dict'>"}


In [5]:
class_mapping = [
    {
        "class": "Repository",
        "path": None,
        "substitutions": {
            "id": "id",
            "full_name": "full_name",
            "html_url": "html_url",
            "description": "description",
            "clone_url": "clone_url",
            "stargazers_count": "stargazers_count",
            "open_issues_count": "open_issues_count",
            "forks_count": "forks_count",
            "default_branch": "default_branch",
        },
    },
    {
        "class": "Owner",
        "path": None,
        "substitutions": {"id": "owner.id", "login": "owner.login", "url": "owner.url", "type": "owner.type"},
    },
    {
        "class": "License",
        "path": None,
        "substitutions": {
            "key": "license.key",
            "name": "license.name",
            "spdx_id": "license.spdx_id",
            "url": "license.url",
            "node_id": "license.node_id",
        },
    },
]

## Reshape data

In [6]:
mapper = JsonToWeaviate(mappings=class_mapping)
reshaped = JsonToWeaviate.from_json(mapper, data[0])

In [10]:
reshaped.classes.Repository.data_objects


[{'id': '53b5a296-f1e6-47cb-ac5a-4381eadddd60',
  'class': 'Repository',
  'data': {'id': 39527199,
   'full_name': 'awsdocs/elastic-beanstalk-samples',
   'html_url': 'https://github.com/awsdocs/elastic-beanstalk-samples',
   'description': 'This repository contains code and configuration samples (e.g. .ebextensions) for AWS Elastic Beanstalk.',
   'clone_url': 'https://github.com/awsdocs/elastic-beanstalk-samples.git',
   'stargazers_count': 1118,
   'open_issues_count': 16,
   'forks_count': 886,
   'default_branch': 'main'}}]