# JSONPath proof-of-concept

exploring a potential JSONPath-like interface for retrieving rubicon logs

### filesystem directory structure

```
|- project_a
|  |- artifacts
|  |  |- id_a
|  |  |  |- data
|  |  |  |_ metadata.json
|  |  |_ ...
|  |- dataframes
|  |  |- id_a
|  |  |  |- data
|  |  |  |_ metadata.json
|  |  |_ ...
|  |- experiments
|     |- id_a
|     |  |- artifacts
|     |  |  |- id_a
|     |  |  |  |- data
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- dataframes
|     |  |  |- id_a
|     |  |  |  |- data
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- features
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- metrics
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- parameters
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |_ metadata.json
|     |- id_b
|     |  |_ ...
|     |_ ...
|- project_b
|  |_ ...
|_ ...
```

In [1]:
import random

random.seed(24)

In [2]:
from rubicon_ml import Rubicon

rb = Rubicon(persistence="memory")
pr = rb.get_or_create_project(name="jsonpath")

project_json = {"project": [pr._domain.__dict__]}
project_json

{'project': [{'name': 'jsonpath',
   'id': '9f91a065-48e3-4f32-950a-f56c09e8b738',
   'description': None,
   'github_url': None,
   'training_metadata': None,
   'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 243788)}]}

In [3]:
import pandas as pd

NUM_EXPERIMENTS = 4

for _ in range(NUM_EXPERIMENTS):
    tags = [random.choice(["a", "b", "c"])]
    ex = pr.log_experiment(tags=tags)
        
    for feature in ["f", "g", "h", "i"]:
        ex.log_feature(name=feature)
            
    for parameter in [("d", 100), ("e", 1000), ("f", 1000)]:
        name, value = parameter
        ex.log_parameter(name=name, value=value)
        
    for metric in ["j", "k"]:
        value = random.choice([0.0, 1.0])
        tags = [random.choice(["l", "m", "n"])]
        ex.log_metric(name=metric, value=value, tags=tags)
        
    ex.log_artifact(name="o", data_bytes=b"o")
    ex.log_dataframe(pd.DataFrame([[0, 1], [1, 0]]))

In [4]:
project_json["project"][0]["experiment"] = []
for i, e in enumerate(pr.experiments()):
    experiment_json = e._domain.__dict__

    experiment_json["feature"] = []
    for f in e.features():
        experiment_json["feature"].append(f._domain.__dict__)

    experiment_json["parameter"] = []
    for p in e.parameters():
        experiment_json["parameter"].append(p._domain.__dict__)

    experiment_json["metric"] = []
    for m in e.metrics():
        experiment_json["metric"].append(m._domain.__dict__)
        
    experiment_json["artifact"] = []
    for a in e.artifacts():
        experiment_json["artifact"].append(a._domain.__dict__)
        
    experiment_json["dataframe"] = []
    for d in e.dataframes():
        experiment_json["dataframe"].append(d._domain.__dict__)

    project_json["project"][0]["experiment"].append(experiment_json)

In [5]:
project_json

{'project': [{'name': 'jsonpath',
   'id': '9f91a065-48e3-4f32-950a-f56c09e8b738',
   'description': None,
   'github_url': None,
   'training_metadata': None,
   'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 243788),
   'experiment': [{'project_name': 'jsonpath',
     'id': 'fb5dec03-4e87-4b1f-b5b8-f39894ca9c27',
     'name': None,
     'description': None,
     'model_name': None,
     'branch_name': None,
     'commit_hash': None,
     'training_metadata': None,
     'tags': ['c'],
     'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 258129),
     'feature': [{'name': 'f',
       'id': '60e045a1-7ba4-4347-a703-e639d7c99225',
       'description': None,
       'importance': None,
       'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 258497)},
      {'name': 'g',
       'id': 'bc0c69fc-112d-4de4-a791-adc3021d6c83',
       'description': None,
       'importance': None,
       'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 258760)},
      {'name'

### `jsonpath_ng`

this seems to be the most recommended Python implementation - not active tho
> https://github.com/h2non/jsonpath-ng

#### get all metrics from each experiment

In [6]:
from jsonpath_ng.ext import parse

expr = parse("$..experiment[*].metric")
res = expr.find(project_json)

print(f"{len(res)} experiments")
for match in res:
    print(f"{len(match.value)} metrics")
    print(match.value)

4 experiments
2 metrics
[{'name': 'j', 'value': 1.0, 'id': '1e6dced0-a13a-428d-b12c-0948fc562b6a', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 259742), 'tags': ['n']}, {'name': 'k', 'value': 0.0, 'id': '0c9f4208-921b-4dab-bf8a-9ac616935c86', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 259898), 'tags': ['l']}]
2 metrics
[{'name': 'j', 'value': 0.0, 'id': '2f3045ca-51aa-473b-882d-0e3487cbc17a', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 262673), 'tags': ['l']}, {'name': 'k', 'value': 0.0, 'id': '06e55f5f-8c08-4678-85d2-1296daadce42', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 262850), 'tags': ['n']}]
2 metrics
[{'name': 'j', 'value': 1.0, 'id': 'a04019a9-796e-4074-8c09-48e9d36f7323', 'description': None, 'directionality': 'score', 'created_at': d

#### get all experiments with tag 'b'

In [7]:
expr = parse("$..experiment[?(@.tags[*]=='b')]")
res = expr.find(project_json)

print(f"{len(res)} experiments")
for match in res:
    print(match.value)

1 experiments
{'project_name': 'jsonpath', 'id': 'c7c7110a-ba04-4653-806a-917ae0b846ca', 'name': None, 'description': None, 'model_name': None, 'branch_name': None, 'commit_hash': None, 'training_metadata': None, 'tags': ['b'], 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 267339), 'feature': [{'name': 'f', 'id': '66215cad-7417-41e8-99b3-f2587e744333', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 267486)}, {'name': 'g', 'id': '4e3ec5d9-d65e-4085-9882-dbc6dbcaf2c9', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 267736)}, {'name': 'h', 'id': '2b54e92d-2265-4b3c-875c-a47b491d7bde', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 267976)}, {'name': 'i', 'id': '3f370aef-a38e-49ed-a7fe-fc89b273ef62', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 268164)}], 'parameter': [{'name'

#### get all metrics named 'j' with a value greater than 0.5 from each experiment

In [8]:
expr = parse("$..experiment[*].metric[?(@.name=='j' & @.value>=0.5)]")
res = expr.find(project_json)

print(f"{len(res)} metrics")
for match in res:
    print(match.value)

2 metrics
{'name': 'j', 'value': 1.0, 'id': '1e6dced0-a13a-428d-b12c-0948fc562b6a', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 259742), 'tags': ['n']}
{'name': 'j', 'value': 1.0, 'id': 'a04019a9-796e-4074-8c09-48e9d36f7323', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 265701), 'tags': ['n']}


#### get all experiments that contian a metric named 'j' with a value less than 0.5

In [9]:
expr = parse("$..experiment[?(@.metric[?(@.name=='j')].value<=0.5)]")
res = expr.find(project_json)

print(f"{len(res)} experiments")
for match in res:
    print(match.value)

2 experiments
{'project_name': 'jsonpath', 'id': '2edfec28-00a0-4aff-8620-5d59c2f0a721', 'name': None, 'description': None, 'model_name': None, 'branch_name': None, 'commit_hash': None, 'training_metadata': None, 'tags': ['a'], 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 261454), 'feature': [{'name': 'f', 'id': 'a1215778-fc70-44df-bbe9-09adc5dfdb16', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 261630)}, {'name': 'g', 'id': '026751c1-ac3a-4140-8c1e-2a8ec90fa4c8', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 261802)}, {'name': 'h', 'id': '717e4f04-a763-4d83-9095-27868d9d8fdf', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 261943)}, {'name': 'i', 'id': 'c90d13d3-a11d-4f81-b11f-44c67b762cb9', 'description': None, 'importance': None, 'created_at': datetime.datetime(2022, 9, 26, 18, 28, 37, 262072)}], 'parameter': [{'name'

#### returning `rubicon_ml` objects

In [10]:
from rubicon_ml.domain import Metric as DomainMetric
from rubicon_ml.client import Metric

class NoOpParent:
    """A read-only parent object"""
    @property
    def _config(self):
        return None

expr = parse("$..experiment[*].metric[?(@.name=='j' & @.value>=0.5)]")
res = expr.find(project_json)

metrics = []
for match in res:
    metrics.append(Metric(DomainMetric(**match.value), NoOpParent()))
        
metrics

[<rubicon_ml.client.metric.Metric at 0x15c46cd00>,
 <rubicon_ml.client.metric.Metric at 0x15c5b7c10>]

In [11]:
for m in metrics:
    print(m.name, m.value)

j 1.0
j 1.0
