# JSONPath proof-of-concept

exploring a potential JSONPath-like interface for retrieving rubicon logs

### filesystem directory structure

```
|- project_a
|  |- artifacts
|  |  |- id_a
|  |  |  |- data
|  |  |  |_ metadata.json
|  |  |_ ...
|  |- dataframes
|  |  |- id_a
|  |  |  |- data
|  |  |  |_ metadata.json
|  |  |_ ...
|  |- experiments
|     |- id_a
|     |  |- artifacts
|     |  |  |- id_a
|     |  |  |  |- data
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- dataframes
|     |  |  |- id_a
|     |  |  |  |- data
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- features
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- metrics
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- parameters
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |_ metadata.json
|     |- id_b
|     |  |_ ...
|     |_ ...
|- project_b
|  |_ ...
|_ ...
```

In [3]:
import random

random.seed(24)

In [4]:
import pandas as pd
from rubicon_ml import Rubicon

NUM_EXPERIMENTS = 4

rb = Rubicon(persistence="memory")
pr = rb.get_or_create_project(name="jsonpath")

for _ in range(NUM_EXPERIMENTS):
    tags = [random.choice(["a", "b", "c"])]
    ex = pr.log_experiment(tags=tags)
        
    for feature in ["f", "g", "h", "i"]:
        ex.log_feature(name=feature)
            
    for parameter in [("d", 100), ("e", 1000), ("f", 1000)]:
        name, value = parameter
        ex.log_parameter(name=name, value=value)
        
    for metric in ["j", "k"]:
        value = random.choice([0.0, 1.0])
        tags = [random.choice(["l", "m", "n"])]
        ex.log_metric(name=metric, value=value, tags=tags)
        
    ex.log_artifact(name="o", data_bytes=b"o")
    ex.log_dataframe(pd.DataFrame([[0, 1], [1, 0]]))
    
pr.log_artifact(name="p", data_bytes=b"p")
pr.log_dataframe(pd.DataFrame([[0, 1], [1, 0]]))

pr

<rubicon_ml.client.project.Project at 0x158bde4d0>

In [5]:
import warnings
from rubicon_ml import Project, Experiment

def convert_to_json(rubicon_objects=None, projects=None, experiments=None):
    if rubicon_objects is not None:
        if not isinstance(rubicon_objects, Rubicon):
            if not isinstance(rubicon_objects, list) or not all([isinstance(rb, Rubicon) for rb in rubicon_objects]):
                raise ValueError("`rubicon_objects` must be of type `Rubicon` or `list` of type `Rubicon`")
    
    if projects is not None:
        if not isinstance(projects, Project):
            if not isinstance(projects, list) or not all([isinstance(pr, Project) for pr in projects]):
                raise ValueError("`projects` must be of type `Project` or `list` of type `Project`")
            
    if experiments is not None:
        if not isinstance(experiments, Experiment):
            if not isinstance(experiments, list) or not all([isinstance(e, Experiment) for e in experiments]):
                raise ValueError("`experiments` must be of type `Experiment` or `list` of type `Experiment`")

    json = {}
    if rubicon_objects is not None:
        json["top_level_rubicon"] = []
        if isinstance(rubicon_objects, list):
            for rb in rubicon_objects:
                json["top_level_rubicon"].append(rubicon_to_json(rb))
        else:
            json["top_level_rubicon"].append(rubicon_to_json(rubicon_objects))       
    if projects is not None:
        json["projects"] = []
        if isinstance(projects, list):
            for pr in projects:
                json["projects"].append(project_to_json(pr))
        else:
            json["projects"].append(project_to_json(projects))
    if experiments is not None:
        json["experiments"] = []
        if isinstance(experiments, list):
            for e in experiments:
                json["experiments"].append(experiment_to_json(e))
        else:
            json["experiments"].append(experiment_to_json(experiments))
        
    return json

def experiment_to_json(experiment):
    if not isinstance(experiment, Experiment):
        if not isinstance(experiment, list) or not all([isinstance(e, Experiment) for e in experiment]):
            raise ValueError("`experiment` must be of type `Experiment` or `list` of type `Experiment`")
    
    if not isinstance(experiment, list):
        experiment = [experiment]
    
    json = {}
    json["experiment"] = []
    for e in experiment:
        experiment_json = e._domain.__dict__
        experiment_json["feature"] = []
        for f in e.features():
            experiment_json["feature"].append(f._domain.__dict__)

        experiment_json["parameter"] = []
        for p in e.parameters():
            experiment_json["parameter"].append(p._domain.__dict__)

        experiment_json["metric"] = []
        for m in e.metrics():
            experiment_json["metric"].append(m._domain.__dict__)

        experiment_json["artifact"] = []
        for a in e.artifacts():
            experiment_json["artifact"].append(a._domain.__dict__)

        experiment_json["dataframe"] = []
        for d in e.dataframes():
            experiment_json["dataframe"].append(d._domain.__dict__)
        
        json["experiment"].append(experiment_json)
        
    return json

def project_to_json(project):
    if not isinstance(project, Project):
        if not isinstance(project, list) or not all([isinstance(pr, Project) for pr in project]):
            raise ValueError("`project` must be of type `Project` or `list` of type `Project`")
    
    if not isinstance(project, list):
        project = [project]
    
    json = {}
    json["project"] = []
    for pr in project:
        project_json = pr._domain.__dict__
        project_json["artifact"] = []
        for a in pr.artifacts():
            project_json["artifact"].append(a._domain.__dict__)

        project_json["dataframe"] = []
        for d in pr.dataframes():
            project_json["dataframe"].append(d._domain.__dict__)

        project_json["experiment"] = []
        for e in pr.experiments():
            project_json["experiment"].append(experiment_to_json(e))
        
        json["project"].append(project_json)

    return json

def rubicon_to_json(rubicon): 
    if not isinstance(rubicon, Rubicon):
        if not isinstance(rubicon, list) or not all([isinstance(rb, Rubicon) for rb in rubicon]):
            raise ValueError("`rubicon_objects` must be of type `Rubicon` or `list` of type `Rubicon`")
    
    if not isinstance(rubicon, list):
        rubicon = [rubicon]
    
    json = {}
    json["rubicon"] = []
    for rb in rubicon:
        rubicon_json = {}
        rubicon_json["projects"] = []
        for pr in rb.projects():
            rubicon_json["projects"].append(project_to_json(pr))
        
        json["rubicon"].append(rubicon_json)
            
    return json

In [4]:
project_json = project_to_json(pr)
project_json

{'project': [{'name': 'jsonpath',
   'id': 'f63e854e-63a5-4bcd-a78c-fd3977a154d5',
   'description': None,
   'github_url': None,
   'training_metadata': None,
   'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 918043),
   'artifact': [{'name': 'p',
     'id': '35b1b02c-88f8-47dc-a537-e9af4a96420a',
     'description': None,
     'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 926331),
     'tags': [],
     'parent_id': 'f63e854e-63a5-4bcd-a78c-fd3977a154d5'}],
   'dataframe': [{'id': '113fccc4-e479-49b2-a894-266350c11222',
     'name': None,
     'description': None,
     'tags': [],
     'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 926700),
     'parent_id': 'f63e854e-63a5-4bcd-a78c-fd3977a154d5'}],
   'experiment': [{'project_name': 'jsonpath',
     'id': '18efd592-18e5-4556-9533-a3d46b9b8435',
     'name': None,
     'description': None,
     'model_name': None,
     'branch_name': None,
     'commit_hash': None,
     'training_metadata': None,
     'tags

In [6]:
rubicon_json = rubicon_to_json(rb)
rubicon_json

{'rubicon': [{'projects': [{'project': [{'name': 'jsonpath',
       'id': 'cb48b33e-fbeb-4395-b774-0b450ec1cc77',
       'description': None,
       'github_url': None,
       'training_metadata': None,
       'created_at': datetime.datetime(2022, 12, 7, 15, 56, 35, 823154),
       'artifact': [{'name': 'p',
         'id': 'af4c089a-3815-43b0-a024-966daced55a1',
         'description': None,
         'created_at': datetime.datetime(2022, 12, 7, 15, 56, 35, 833896),
         'tags': [],
         'parent_id': 'cb48b33e-fbeb-4395-b774-0b450ec1cc77'}],
       'dataframe': [{'id': 'cf5365d6-d5ed-4d93-99d4-a3239fb60d16',
         'name': None,
         'description': None,
         'tags': [],
         'created_at': datetime.datetime(2022, 12, 7, 15, 56, 35, 834132),
         'parent_id': 'cb48b33e-fbeb-4395-b774-0b450ec1cc77'}],
       'experiment': [{'experiment': [{'project_name': 'jsonpath',
           'id': '56749ff6-ee68-4820-a0c8-1db3ff0caed6',
           'name': None,
           'des

In [7]:
ex = pr.log_experiment(name="test_experiment_to_json")
experiment_json = experiment_to_json(ex)
experiment_json

{'experiment': [{'project_name': 'jsonpath',
   'id': '7126319a-f245-4e03-a561-fae7a384b38d',
   'name': 'test_experiment_to_json',
   'description': None,
   'model_name': None,
   'branch_name': None,
   'commit_hash': None,
   'training_metadata': None,
   'tags': [],
   'created_at': datetime.datetime(2022, 12, 7, 15, 56, 45, 895682),
   'feature': [],
   'parameter': [],
   'metric': [],
   'artifact': [],
   'dataframe': []}]}

In [8]:
converted_to_json = convert_to_json(rubicon_objects=rb, projects=pr, experiments=ex)
converted_to_json

{'top_level_rubicon': [{'rubicon': [{'projects': [{'project': [{'name': 'jsonpath',
         'id': 'cb48b33e-fbeb-4395-b774-0b450ec1cc77',
         'description': None,
         'github_url': None,
         'training_metadata': None,
         'created_at': datetime.datetime(2022, 12, 7, 15, 56, 35, 823154),
         'artifact': [{'name': 'p',
           'id': 'af4c089a-3815-43b0-a024-966daced55a1',
           'description': None,
           'created_at': datetime.datetime(2022, 12, 7, 15, 56, 35, 833896),
           'tags': [],
           'parent_id': 'cb48b33e-fbeb-4395-b774-0b450ec1cc77'}],
         'dataframe': [{'id': 'cf5365d6-d5ed-4d93-99d4-a3239fb60d16',
           'name': None,
           'description': None,
           'tags': [],
           'created_at': datetime.datetime(2022, 12, 7, 15, 56, 35, 834132),
           'parent_id': 'cb48b33e-fbeb-4395-b774-0b450ec1cc77'}],
         'experiment': [{'experiment': [{'project_name': 'jsonpath',
             'id': '56749ff6-ee68-4820

### `jsonpath_ng`

this seems to be the most recommended Python implementation - not active tho
> https://github.com/h2non/jsonpath-ng

In [5]:
from jsonpath_ng.ext import parse

class RubiconJSON:
    def __init__(self, rubicon_json):
        self._rubicon_json = rubicon_json

    def search(self, query):
        return parse(query).find(self._rubicon_json)
    
rb_json = RubiconJSON(rubicon_json)

#### get all metrics from each experiment

In [6]:
res = rb_json.search("$..experiment[*].metric")

print(f"{len(res)} experiments")
for match in res:
    print(f"{len(match.value)} metrics")
    print(match.value)

4 experiments
2 metrics
[{'name': 'j', 'value': 1.0, 'id': '5d6418ca-4c84-44e6-b99f-c38307f4602b', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 919580), 'tags': ['n']}, {'name': 'k', 'value': 0.0, 'id': '4b69e8f9-d64c-4798-8235-06f693b76599', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 919719), 'tags': ['l']}]
2 metrics
[{'name': 'j', 'value': 0.0, 'id': '31b3a0d6-0d12-42a5-afe3-ede741179725', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 921708), 'tags': ['l']}, {'name': 'k', 'value': 0.0, 'id': 'd1102f9e-4e77-45af-b4f5-c551c11b8a24', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 921847), 'tags': ['n']}]
2 metrics
[{'name': 'j', 'value': 1.0, 'id': '0e713360-b1d2-4820-b3aa-722e966cdeea', 'description': None, 'directionality': 'score', 'created_at': datet

#### get all experiments with tag 'b'

In [7]:
res = rb_json.search("$..experiment[?(@.tags[*]=='b')]")

print(f"{len(res)} experiments")
for match in res:
    print(match.value)

1 experiments
{'project_name': 'jsonpath', 'id': 'b5a8cd36-1004-496b-a589-6ef7db459926', 'name': None, 'description': None, 'model_name': None, 'branch_name': None, 'commit_hash': None, 'training_metadata': None, 'tags': ['b'], 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 924445), 'feature': [{'name': 'f', 'id': 'f2f3914b-cc7a-4041-b077-ae0580c4c98c', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 924557)}, {'name': 'g', 'id': 'eae44410-e917-47e0-8042-3a3209f73b21', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 924706)}, {'name': 'h', 'id': '2297332f-f239-44e9-aa4b-bb8eb0115c31', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 924819)}, {'name': 'i', 'id': '3f15215c-14f5-48bb-8339-65a99d505ede', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 924928)}], 'parameter': [{'name': 'd'

#### get all metrics named 'j' with a value greater than 0.5 from each experiment

In [8]:
res = rb_json.search("$..experiment[*].metric[?(@.name=='j' & @.value>=0.5)]")

print(f"{len(res)} metrics")
for match in res:
    print(match.value)

2 metrics
{'name': 'j', 'value': 1.0, 'id': '5d6418ca-4c84-44e6-b99f-c38307f4602b', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 919580), 'tags': ['n']}
{'name': 'j', 'value': 1.0, 'id': '0e713360-b1d2-4820-b3aa-722e966cdeea', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 923547), 'tags': ['n']}


#### get all experiments that contian a metric named 'j' with a value less than 0.5

In [9]:
res = rb_json.search("$..experiment[?(@.metric[?(@.name=='j')].value<=0.5)]")

print(f"{len(res)} experiments")
for match in res:
    print(match.value)

2 experiments
{'project_name': 'jsonpath', 'id': '80f4c653-eea8-4f72-b7a0-36b5f76a7ee3', 'name': None, 'description': None, 'model_name': None, 'branch_name': None, 'commit_hash': None, 'training_metadata': None, 'tags': ['a'], 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 920751), 'feature': [{'name': 'f', 'id': '5657f0a3-8c4a-49fd-b8ae-672d7fb742da', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 920867)}, {'name': 'g', 'id': '614aa071-534c-425a-921d-9d6a2c020a96', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 921003)}, {'name': 'h', 'id': 'a0d17bf2-955e-47e4-b73e-0e579754f4a0', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 921124)}, {'name': 'i', 'id': '4f53bedf-44c8-47b3-a058-50498b9cd65d', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 21, 3, 25, 921238)}], 'parameter': [{'name': 'd'

#### returning `rubicon_ml` objects

In [10]:
from rubicon_ml.domain import Metric as DomainMetric
from rubicon_ml.client import Metric

class NoOpParent:
    """A read-only parent object"""
    @property
    def _config(self):
        return None

In [11]:
res = rb_json.search("$..experiment[*].metric[?(@.name=='j' & @.value>=0.5)]")

metrics = []
for match in res:
    metrics.append(Metric(DomainMetric(**match.value), NoOpParent()))
        
metrics

[<rubicon_ml.client.metric.Metric at 0x163481510>,
 <rubicon_ml.client.metric.Metric at 0x163409a50>]

In [12]:
for m in metrics:
    print(m.name, m.value)

j 1.0
j 1.0
