# JSONPath proof-of-concept

exploring a potential JSONPath-like interface for retrieving rubicon logs

### filesystem directory structure

```
|- project_a
|  |- artifacts
|  |  |- id_a
|  |  |  |- data
|  |  |  |_ metadata.json
|  |  |_ ...
|  |- dataframes
|  |  |- id_a
|  |  |  |- data
|  |  |  |_ metadata.json
|  |  |_ ...
|  |- experiments
|     |- id_a
|     |  |- artifacts
|     |  |  |- id_a
|     |  |  |  |- data
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- dataframes
|     |  |  |- id_a
|     |  |  |  |- data
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- features
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- metrics
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |- parameters
|     |  |  |- name_a
|     |  |  |  |_ metadata.json
|     |  |  |_ ...
|     |  |_ metadata.json
|     |- id_b
|     |  |_ ...
|     |_ ...
|- project_b
|  |_ ...
|_ ...
```

In [1]:
import random

random.seed(24)

In [2]:
import pandas as pd
from rubicon_ml import Rubicon

NUM_EXPERIMENTS = 4

rb = Rubicon(persistence="memory")
pr = rb.get_or_create_project(name="jsonpath")

for _ in range(NUM_EXPERIMENTS):
    tags = [random.choice(["a", "b", "c"])]
    ex = pr.log_experiment(tags=tags)
        
    for feature in ["f", "g", "h", "i"]:
        ex.log_feature(name=feature)
            
    for parameter in [("d", 100), ("e", 1000), ("f", 1000)]:
        name, value = parameter
        ex.log_parameter(name=name, value=value)
        
    for metric in ["j", "k"]:
        value = random.choice([0.0, 1.0])
        tags = [random.choice(["l", "m", "n"])]
        ex.log_metric(name=metric, value=value, tags=tags)
        
    ex.log_artifact(name="o", data_bytes=b"o")
    ex.log_dataframe(pd.DataFrame([[0, 1], [1, 0]]))
    
pr.log_artifact(name="p", data_bytes=b"p")
pr.log_dataframe(pd.DataFrame([[0, 1], [1, 0]]))

pr

<rubicon_ml.client.project.Project at 0x11328ba90>

In [3]:
import warnings
from rubicon_ml import Project, Experiment

def convert_to_json(rubicon_objects=None, projects=None, experiments=None):

    json = None
    if rubicon_objects is not None:
        json = rubicon_to_json(rubicon_objects)  
    if projects is not None:
        if json is None:
            json = project_to_json(projects)
        else:
            new_json = project_to_json(projects)
            for pr in new_json["project"]:
                json["project"].append(pr)
    if experiments is not None:
        if json is None:
            json = experiment_to_json(experiments)
        else:
            new_json = experiment_to_json(experiments)
            if json.get("experiment") is None:
                json["experiment"] = []
            for e in new_json["experiment"]:
                json["experiment"].append(e)
        
    return json

def experiment_to_json(experiment):
    if not isinstance(experiment, Experiment):
        if not isinstance(experiment, list) or not all([isinstance(e, Experiment) for e in experiment]):
            raise ValueError("`experiment` must be of type `Experiment` or `list` of type `Experiment`")
    
    if not isinstance(experiment, list):
        experiment = [experiment]
    
    json = {}
    json["experiment"] = []
    for e in experiment:
        experiment_json = e._domain.__dict__
        experiment_json["feature"] = []
        for f in e.features():
            experiment_json["feature"].append(f._domain.__dict__)

        experiment_json["parameter"] = []
        for p in e.parameters():
            experiment_json["parameter"].append(p._domain.__dict__)

        experiment_json["metric"] = []
        for m in e.metrics():
            experiment_json["metric"].append(m._domain.__dict__)

        experiment_json["artifact"] = []
        for a in e.artifacts():
            experiment_json["artifact"].append(a._domain.__dict__)

        experiment_json["dataframe"] = []
        for d in e.dataframes():
            experiment_json["dataframe"].append(d._domain.__dict__)
        
        json["experiment"].append(experiment_json)
        
    return json

def project_to_json(project):
    if not isinstance(project, Project):
        if not isinstance(project, list) or not all([isinstance(pr, Project) for pr in project]):
            raise ValueError("`project` must be of type `Project` or `list` of type `Project`")
    
    if not isinstance(project, list):
        project = [project]
    
    json = {}
    json["project"] = []
    for pr in project:
        project_json = pr._domain.__dict__
        project_json["artifact"] = []
        for a in pr.artifacts():
            project_json["artifact"].append(a._domain.__dict__)

        project_json["dataframe"] = []
        for d in pr.dataframes():
            project_json["dataframe"].append(d._domain.__dict__)

        project_json["experiment"] = []
        for e in pr.experiments():
            project_json["experiment"].append(experiment_to_json(e))
        
        json["project"].append(project_json)

    return json

def rubicon_to_json(rubicon): 
    if not isinstance(rubicon, Rubicon):
        if not isinstance(rubicon, list) or not all([isinstance(rb, Rubicon) for rb in rubicon]):
            raise ValueError("`rubicon_objects` must be of type `Rubicon` or `list` of type `Rubicon`")
    
    if not isinstance(rubicon, list):
        rubicon = [rubicon]
    
    json = None
    for rb in rubicon:
        for pr in rb.projects():
            if json is None:
                json = project_to_json(pr)
            else:
                new_json = project_to_json(pr)
                for p in new_json["project"]:
                    json["project"].append(p)
            
    return json

In [4]:
project_json = project_to_json(pr)
project_json

{'project': [{'name': 'jsonpath',
   'id': '2942e89c-997f-405e-b4c5-5e7e621fc730',
   'description': None,
   'github_url': None,
   'training_metadata': None,
   'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 268611),
   'artifact': [{'name': 'p',
     'id': '029d7b1f-3b0e-47f7-8429-fa95d13bd54a',
     'description': None,
     'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 272389),
     'tags': [],
     'parent_id': '2942e89c-997f-405e-b4c5-5e7e621fc730'}],
   'dataframe': [{'id': '7eaaceff-1ed9-4ab6-9021-b57714f3c820',
     'name': None,
     'description': None,
     'tags': [],
     'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 272554),
     'parent_id': '2942e89c-997f-405e-b4c5-5e7e621fc730'}],
   'experiment': [{'experiment': [{'project_name': 'jsonpath',
       'id': '2ec5a508-28cd-49a4-9415-dddb58403e50',
       'name': None,
       'description': None,
       'model_name': None,
       'branch_name': None,
       'commit_hash': None,
       'traini

In [5]:
rubicon_json = rubicon_to_json(rb)
rubicon_json

{'rubicon': [{'projects': [{'project': [{'name': 'jsonpath',
       'id': '2942e89c-997f-405e-b4c5-5e7e621fc730',
       'description': None,
       'github_url': None,
       'training_metadata': None,
       'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 268611),
       'artifact': [{'name': 'p',
         'id': '029d7b1f-3b0e-47f7-8429-fa95d13bd54a',
         'description': None,
         'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 272389),
         'tags': [],
         'parent_id': '2942e89c-997f-405e-b4c5-5e7e621fc730'}],
       'dataframe': [{'id': '7eaaceff-1ed9-4ab6-9021-b57714f3c820',
         'name': None,
         'description': None,
         'tags': [],
         'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 272554),
         'parent_id': '2942e89c-997f-405e-b4c5-5e7e621fc730'}],
       'experiment': [{'experiment': [{'project_name': 'jsonpath',
           'id': '2ec5a508-28cd-49a4-9415-dddb58403e50',
           'name': None,
           'descri

In [6]:
ex = pr.log_experiment(name="test_experiment_to_json")
experiment_json = experiment_to_json(ex)
experiment_json

{'experiment': [{'project_name': 'jsonpath',
   'id': 'd2fbc463-84da-4110-9e35-93e0a659b033',
   'name': 'test_experiment_to_json',
   'description': None,
   'model_name': None,
   'branch_name': None,
   'commit_hash': None,
   'training_metadata': None,
   'tags': [],
   'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 305376),
   'feature': [],
   'parameter': [],
   'metric': [],
   'artifact': [],
   'dataframe': []}]}

In [7]:
converted_to_json = convert_to_json(rubicon_objects=rb, projects=pr, experiments=ex)
converted_to_json

{'top_level_rubicon': [{'rubicon': [{'projects': [{'project': [{'name': 'jsonpath',
         'id': '2942e89c-997f-405e-b4c5-5e7e621fc730',
         'description': None,
         'github_url': None,
         'training_metadata': None,
         'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 268611),
         'artifact': [{'name': 'p',
           'id': '029d7b1f-3b0e-47f7-8429-fa95d13bd54a',
           'description': None,
           'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 272389),
           'tags': [],
           'parent_id': '2942e89c-997f-405e-b4c5-5e7e621fc730'}],
         'dataframe': [{'id': '7eaaceff-1ed9-4ab6-9021-b57714f3c820',
           'name': None,
           'description': None,
           'tags': [],
           'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 272554),
           'parent_id': '2942e89c-997f-405e-b4c5-5e7e621fc730'}],
         'experiment': [{'experiment': [{'project_name': 'jsonpath',
             'id': '2ec5a508-28cd-49a4-94

### `jsonpath_ng`

this seems to be the most recommended Python implementation - not active tho
> https://github.com/h2non/jsonpath-ng

In [8]:
from jsonpath_ng.ext import parse

class RubiconJSON:
    def __init__(self, rubicon_json):
        self._rubicon_json = rubicon_json

    def search(self, query):
        return parse(query).find(self._rubicon_json)
    
rb_json = RubiconJSON(rubicon_json)

#### get all metrics from each experiment

In [9]:
res = rb_json.search("$..experiment[*].metric")

print(f"{len(res)} experiments")
for match in res:
    print(f"{len(match.value)} metrics")
    print(match.value)

4 experiments
2 metrics
[{'name': 'j', 'value': 1.0, 'id': 'e6fec1d0-c4df-461f-af94-6c5dfd4bc734', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 269319), 'tags': ['n']}, {'name': 'k', 'value': 0.0, 'id': '0df5ecde-6d1c-4edd-9f2d-5bd638fa2e93', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 269381), 'tags': ['l']}]
2 metrics
[{'name': 'j', 'value': 0.0, 'id': 'ec95746b-4fa6-4b98-bedf-dea63cce0a64', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 270316), 'tags': ['l']}, {'name': 'k', 'value': 0.0, 'id': '37df4ebc-3df0-4c00-8140-d90f257a77dc', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 270376), 'tags': ['n']}]
2 metrics
[{'name': 'j', 'value': 1.0, 'id': '69ad1fce-c3bc-4014-b843-9daa8a5622b9', 'description': None, 'directionality': 'score', 'created_at': datet

#### get all experiments with tag 'b'

In [10]:
res = rb_json.search("$..experiment[?(@.tags[*]=='b')]")

print(f"{len(res)} experiments")
for match in res:
    print(match.value)

1 experiments
{'project_name': 'jsonpath', 'id': 'cdf5175c-dfd8-4057-af1b-c03030b2feb3', 'name': None, 'description': None, 'model_name': None, 'branch_name': None, 'commit_hash': None, 'training_metadata': None, 'tags': ['b'], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 271548), 'feature': [{'name': 'f', 'id': 'fdee90c0-afdf-4744-aa76-ff2ff9ece245', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 271594)}, {'name': 'g', 'id': '9713611e-8662-43b0-8a29-49ab93528c69', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 271662)}, {'name': 'h', 'id': '0484befc-73d2-4ddc-8fbc-cd1bf4c09edd', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 271710)}, {'name': 'i', 'id': '3df00398-62e2-4498-a202-98265b0df8f2', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 

#### get all metrics named 'j' with a value greater than 0.5 from each experiment

In [11]:
res = rb_json.search("$..experiment[*].metric[?(@.name=='j' & @.value>=0.5)]")

print(f"{len(res)} metrics")
for match in res:
    print(match.value)

2 metrics
{'name': 'j', 'value': 1.0, 'id': 'e6fec1d0-c4df-461f-af94-6c5dfd4bc734', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 269319), 'tags': ['n']}
{'name': 'j', 'value': 1.0, 'id': '69ad1fce-c3bc-4014-b843-9daa8a5622b9', 'description': None, 'directionality': 'score', 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 271091), 'tags': ['n']}


#### get all experiments that contian a metric named 'j' with a value less than 0.5

In [12]:
res = rb_json.search("$..experiment[?(@.metric[?(@.name=='j')].value<=0.5)]")

print(f"{len(res)} experiments")
for match in res:
    print(match.value)

2 experiments
{'project_name': 'jsonpath', 'id': '93f29d26-8f64-4d8d-a842-b641940fea0b', 'name': None, 'description': None, 'model_name': None, 'branch_name': None, 'commit_hash': None, 'training_metadata': None, 'tags': ['a'], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 269925), 'feature': [{'name': 'f', 'id': 'ed8d2317-8836-4195-9057-7552a30d30cc', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 269972)}, {'name': 'g', 'id': 'aa1e79bf-d0a6-452c-a66e-8eaa262e2cbb', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 270033)}, {'name': 'h', 'id': 'dd0e0591-106f-4e8e-96e8-a9896f2375a3', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 19, 1, 16, 270080)}, {'name': 'i', 'id': '80094e6e-e955-444f-9913-900dc8330f30', 'description': None, 'importance': None, 'tags': [], 'created_at': datetime.datetime(2022, 12, 7, 

#### returning `rubicon_ml` objects

In [13]:
from rubicon_ml.domain import Metric as DomainMetric
from rubicon_ml.client import Metric

class NoOpParent:
    """A read-only parent object"""
    @property
    def _config(self):
        return None

In [14]:
res = rb_json.search("$..experiment[*].metric[?(@.name=='j' & @.value>=0.5)]")

metrics = []
for match in res:
    metrics.append(Metric(DomainMetric(**match.value), NoOpParent()))
        
metrics

[<rubicon_ml.client.metric.Metric at 0x14b607040>,
 <rubicon_ml.client.metric.Metric at 0x14b607910>]

In [15]:
for m in metrics:
    print(m.name, m.value)

j 1.0
j 1.0
