## Introduction

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
from pprint import pprint

## Data exploration - Tree

In [3]:
from argonodes.nodes import Tree

### First, we load the raw JSON data

In [4]:
filename = "../inputs/2022_MARCH.json"

with open(filename, "r") as jsonfile:
    json_data = json.loads(jsonfile.read())
    
print(str(json_data)[:512] + "...")

{'timelineObjects': [{'placeVisit': {'location': {'latitudeE7': 465196535, 'longitudeE7': 66322734, 'placeId': 'ChIJ5aeJzT4pjEcRXu7iysk_F-s', 'address': 'Lausanne\nSuisse', 'name': 'Lausanne', 'locationConfidence': 100.0, 'calibratedProbability': 100.0}, 'duration': {'startTimestamp': '2022-03-07T13:08:43.398Z', 'endTimestamp': '2022-03-07T17:13:38.828Z'}, 'placeConfidence': 'USER_CONFIRMED', 'visitConfidence': 100, 'otherCandidateLocations': [{'latitudeE7': 465177565, 'longitudeE7': 66284570, 'placeId': 'C...


### Then, we create a tree with the data

In [5]:
tree = Tree(json_data)

### We can explore whatever is inside

In [6]:
tree

Tree '$' with 1 children
- fieldName: $
- foundType: Root
- descriptiveType: None
- unique: None
- default: None
- description: None
- choices: None
- regex: None
- parent: None
- path: $
- data: None

In [7]:
tree.children[0]

NodeDict 'timelineObjects' with 77 childrens
- fieldName: timelineObjects
- foundType: list
- descriptiveType: None
- unique: None
- default: None
- description: None
- choices: None
- regex: None
- parent: $
- path: $.timelineObjects
- data: None

In [8]:
print(len(tree.get_paths()))
tree.get_paths()

149


{'$',
 '$.timelineObjects',
 '$.timelineObjects[*]',
 '$.timelineObjects[*].activitySegment',
 '$.timelineObjects[*].activitySegment.activities',
 '$.timelineObjects[*].activitySegment.activities[*]',
 '$.timelineObjects[*].activitySegment.activities[*].activityType',
 '$.timelineObjects[*].activitySegment.activities[*].probability',
 '$.timelineObjects[*].activitySegment.activityType',
 '$.timelineObjects[*].activitySegment.confidence',
 '$.timelineObjects[*].activitySegment.distance',
 '$.timelineObjects[*].activitySegment.duration',
 '$.timelineObjects[*].activitySegment.duration.endTimestamp',
 '$.timelineObjects[*].activitySegment.duration.startTimestamp',
 '$.timelineObjects[*].activitySegment.editActionMetadata',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityConfidence',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityType',
 '$.timeli

In [9]:
tree.get_children_from_path("$.timelineObjects[4].activitySegment.activities[0].activityType")

[NodeDict 'activityType'
 - fieldName: activityType
 - foundType: str
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - choices: None
 - regex: None
 - parent: [0]
 - path: $.timelineObjects[4].activitySegment.activities[0].activityType
 - data: "IN_SUBWAY" (length of 9)]

#### `get_children_from_path` supports wildcards and returns every match

In [10]:
tree.get_children_from_path("$.timelineObjects[4].activitySegment.activities[*].*")

[NodeDict 'activityType'
 - fieldName: activityType
 - foundType: str
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - choices: None
 - regex: None
 - parent: [0]
 - path: $.timelineObjects[4].activitySegment.activities[0].activityType
 - data: "IN_SUBWAY" (length of 9),
 NodeDict 'probability'
 - fieldName: probability
 - foundType: float
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - choices: None
 - regex: None
 - parent: [0]
 - path: $.timelineObjects[4].activitySegment.activities[0].probability
 - data: "58.31265449523926" (length of 17),
 NodeDict 'activityType'
 - fieldName: activityType
 - foundType: str
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - choices: None
 - regex: None
 - parent: [1]
 - path: $.timelineObjects[4].activitySegment.activities[1].activityType
 - data: "IN_TRAIN" (length of 8),
 NodeDict 'probability'
 - fieldName: probability
 - foundType: float
 - descr

## Data semantics - Model

In [11]:
from argonodes.models import Model

### We create a new model from an existing Tree

In [12]:
model = Model(tree, name="Google Geolocation – YEAR_MONTH.json")

### Now, we can see what paths are available in our model

In [13]:
model.get_paths()

{'$',
 '$.timelineObjects',
 '$.timelineObjects[*]',
 '$.timelineObjects[*].activitySegment',
 '$.timelineObjects[*].activitySegment.activities',
 '$.timelineObjects[*].activitySegment.activities[*]',
 '$.timelineObjects[*].activitySegment.activities[*].activityType',
 '$.timelineObjects[*].activitySegment.activities[*].probability',
 '$.timelineObjects[*].activitySegment.activityType',
 '$.timelineObjects[*].activitySegment.confidence',
 '$.timelineObjects[*].activitySegment.distance',
 '$.timelineObjects[*].activitySegment.duration',
 '$.timelineObjects[*].activitySegment.duration.endTimestamp',
 '$.timelineObjects[*].activitySegment.duration.startTimestamp',
 '$.timelineObjects[*].activitySegment.editActionMetadata',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityConfidence',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityType',
 '$.timeli

### But more importantly, we can see what attributes are linked to these paths

In [14]:
model.traversal

{'$': {'foundType': argonodes.nodes.Root,
  'descriptiveType': None,
  'unique': None,
  'default': None,
  'description': None,
  'choices': None,
  'regex': None,
  'traversal': {'$.timelineObjects': {'foundType': list,
    'descriptiveType': None,
    'unique': None,
    'default': None,
    'description': None,
    'choices': None,
    'regex': None,
    'traversal': {'$.timelineObjects[*]': {'foundType': dict,
      'descriptiveType': None,
      'unique': None,
      'default': None,
      'description': None,
      'choices': None,
      'regex': None,
      'traversal': {'$.timelineObjects[*].placeVisit': {'foundType': dict,
        'descriptiveType': None,
        'unique': None,
        'default': None,
        'description': None,
        'choices': None,
        'regex': None,
        'traversal': {'$.timelineObjects[*].placeVisit.location': {'foundType': dict,
          'descriptiveType': None,
          'unique': None,
          'default': None,
          'description': N

In [15]:
import pandas as pd

In [16]:
liste = model.to_list()
df = pd.DataFrame(data=liste[1:], columns=liste[0])
df.head()

Unnamed: 0,path,foundType,descriptiveType,unique,default,description,choices,regex
0,$,<class 'argonodes.nodes.Root'>,,,,,,
1,$.timelineObjects,<class 'list'>,,,,,,
2,$.timelineObjects[*],<class 'dict'>,,,,,,
3,$.timelineObjects[*].placeVisit,<class 'dict'>,,,,,,
4,$.timelineObjects[*].placeVisit.location,<class 'dict'>,,,,,,


### It's a bit empty for now... Let's add some info.

In [17]:
model.set_attribute(
    "$",
    description="Model for the list of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`."
)

True

In [18]:
model.set_attribute(
    "$.timelineObjects",
    unique=True,
    description="List of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`."
)

True

In [19]:
model.set_attribute(
    "$.timelineObjects[*]",
    unique=False,
    description="One of the semantic locations. Can be either `placeVisit` or `activitySegment`."
)

True

In [20]:
model.set_attribute(
    "$.timelineObjects[*].placeVisit.location",
    descriptiveType="https://schema.org/location",
    unique=False,
    description="Location that Google thinks you visited (latitude, longitude, id, address, name and confidence)."
)

True

In [21]:
liste = model.to_list()
df = pd.DataFrame(data=liste[1:], columns=liste[0])
df.head()

Unnamed: 0,path,foundType,descriptiveType,unique,default,description,choices,regex
0,$,<class 'argonodes.nodes.Root'>,,,,Model for the list of semantic locations deter...,,
1,$.timelineObjects,<class 'list'>,,True,,List of semantic locations determined by Googl...,,
2,$.timelineObjects[*],<class 'dict'>,,False,,One of the semantic locations. Can be either `...,,
3,$.timelineObjects[*].placeVisit,<class 'dict'>,,,,,,
4,$.timelineObjects[*].placeVisit.location,<class 'dict'>,https://schema.org/location,False,,Location that Google thinks you visited (latit...,,


## Data semantics - Apply the model

### Now that we have a working model, let's apply it back to our existing tree

In [22]:
tree.get_children_from_path("$.timelineObjects[0].placeVisit.location")[0]

NodeDict 'location' with 7 childrens
- fieldName: location
- foundType: dict
- descriptiveType: None
- unique: None
- default: None
- description: None
- choices: None
- regex: None
- parent: placeVisit
- path: $.timelineObjects[0].placeVisit.location
- data: None

In [23]:
tree.apply(model)

Tree '$' with 1 children and with 149 paths
- fieldName: $
- foundType: Root
- descriptiveType: None
- unique: None
- default: None
- description: Model for the list of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`.
- choices: None
- regex: None
- parent: None
- path: $
- data: None

In [24]:
tree.get_children_from_path("$.timelineObjects[0].placeVisit.location")[0]

NodeDict 'location' with 7 childrens
- fieldName: location
- foundType: dict
- descriptiveType: https://schema.org/location
- unique: False
- default: None
- description: Location that Google thinks you visited (latitude, longitude, id, address, name and confidence).
- choices: None
- regex: None
- parent: placeVisit
- path: $.timelineObjects[0].placeVisit.location
- data: None

## Data filtering - Add filters to the model

### We can filter the model in ordrer to only keep what we need.

In [25]:
from argonodes.filters import Filter

In [26]:
placeVisit = Filter(path__startswith="$.timelineObjects[*].placeVisit")
onlyLocation = Filter(descriptiveType__exact="https://schema.org/location")

In [27]:
placeVisit(model)

{'$': {'foundType': <class 'argonodes.nodes.Root'>, 'descriptiveType': None, 'unique': None, 'default': None, 'description': 'Model for the list of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`.', 'choices': None, 'regex': None, 'traversal': {'$.timelineObjects': {'traversal': {'$.timelineObjects[*]': {'traversal': {'$.timelineObjects[*].placeVisit': {'foundType': <class 'dict'>, 'descriptiveType': None, 'unique': None, 'default': None, 'description': None, 'choices': None, 'regex': None, 'traversal': {'$.timelineObjects[*].placeVisit.location': {'foundType': <class 'dict'>, 'descriptiveType': 'https://schema.org/location', 'unique': False, 'default': None, 'description': 'Location that Google thinks you visited (latitude, longitude, id, address, name and confidence).', 'choices': None, 'regex': None, 'traversal': {'$.timelineObjects[*].placeVisit.location.latitudeE7': {'foundType': <class 'int'>, 'descriptiveType': None, 'unique': None, 'defa

In [28]:
onlyLocation(model)

{'$': {'foundType': <class 'argonodes.nodes.Root'>, 'descriptiveType': None, 'unique': None, 'default': None, 'description': 'Model for the list of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`.', 'choices': None, 'regex': None, 'traversal': {'$.timelineObjects': {'traversal': {'$.timelineObjects[*]': {'traversal': {'$.timelineObjects[*].placeVisit': {'traversal': {'$.timelineObjects[*].placeVisit.location': {'foundType': <class 'dict'>, 'descriptiveType': 'https://schema.org/location', 'unique': False, 'default': None, 'description': 'Location that Google thinks you visited (latitude, longitude, id, address, name and confidence).', 'choices': None, 'regex': None, 'traversal': {}}}}}}}}}}}

In [29]:
model.traversal

{'$': {'foundType': argonodes.nodes.Root,
  'descriptiveType': None,
  'unique': None,
  'default': None,
  'description': 'Model for the list of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`.',
  'choices': None,
  'regex': None,
  'traversal': {'$.timelineObjects': {'traversal': {'$.timelineObjects[*]': {'traversal': {'$.timelineObjects[*].placeVisit': {'traversal': {'$.timelineObjects[*].placeVisit.location': {'foundType': dict,
          'descriptiveType': 'https://schema.org/location',
          'unique': False,
          'default': None,
          'description': 'Location that Google thinks you visited (latitude, longitude, id, address, name and confidence).',
          'choices': None,
          'regex': None,
          'traversal': {}}}}}}}}}}}

## Data concierge - Export and import

### If you have an existing model, you can export it.

In [30]:
model.export_traversal(filename="my_model.pickle")

### Afterwards it can be imported as well.

In [31]:
model = Model(name="Google Geolocation – YEAR_MONTH.json")
model

{}

In [32]:
model.load_traversal(filename="my_model.pickle")

In [33]:
model.traversal

{'$': {'foundType': argonodes.nodes.Root,
  'descriptiveType': None,
  'unique': None,
  'default': None,
  'description': 'Model for the list of semantic locations determined by Google. Can be either `placeVisit` or `activitySegment`.',
  'choices': None,
  'regex': None,
  'traversal': {'$.timelineObjects': {'traversal': {'$.timelineObjects[*]': {'traversal': {'$.timelineObjects[*].placeVisit': {'traversal': {'$.timelineObjects[*].placeVisit.location': {'foundType': dict,
          'descriptiveType': 'https://schema.org/location',
          'unique': False,
          'default': None,
          'description': 'Location that Google thinks you visited (latitude, longitude, id, address, name and confidence).',
          'choices': None,
          'regex': None,
          'traversal': {}}}}}}}}}}}