## Introduction

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
from pprint import pprint

## Data exploration - Tree

### First, we load the raw JSON data

In [3]:
filename = "../inputs/2022_MARCH.json"

with open(filename, "r") as jsonfile:
    json_data = json.loads(jsonfile.read())
    
print(str(json_data)[:512] + "...")

{'timelineObjects': [{'placeVisit': {'location': {'latitudeE7': 465196535, 'longitudeE7': 66322734, 'placeId': 'ChIJ5aeJzT4pjEcRXu7iysk_F-s', 'address': 'Lausanne\nSuisse', 'name': 'Lausanne', 'locationConfidence': 100.0, 'calibratedProbability': 100.0}, 'duration': {'startTimestamp': '2022-03-07T13:08:43.398Z', 'endTimestamp': '2022-03-07T17:13:38.828Z'}, 'placeConfidence': 'USER_CONFIRMED', 'visitConfidence': 100, 'otherCandidateLocations': [{'latitudeE7': 465177565, 'longitudeE7': 66284570, 'placeId': 'C...


### Then, we create a tree with the data

In [4]:
from json_ld_semantics.semantics import Tree
tree = Tree(json_data)

### We can explore whatever is inside

In [5]:
tree

Tree '$' with 1 children
- fieldName: $
- data: Length of 290595
- foundType: Root
- descriptiveType: None
- unique: None
- default: None
- description: None
- example: None
- regex: None
- parent: None
- path: $

In [6]:
tree.children[0]

NodeDict 'timelineObjects' with 77 childrens
- fieldName: timelineObjects
- data: Length of 290574
- foundType: list
- descriptiveType: None
- unique: None
- default: None
- description: None
- example: None
- regex: None
- parent: $
- path: $.timelineObjects

In [7]:
print(len(tree.get_paths()))
tree.get_paths()

149


{'$',
 '$.timelineObjects',
 '$.timelineObjects[*]',
 '$.timelineObjects[*].activitySegment',
 '$.timelineObjects[*].activitySegment.activities',
 '$.timelineObjects[*].activitySegment.activities[*]',
 '$.timelineObjects[*].activitySegment.activities[*].activityType',
 '$.timelineObjects[*].activitySegment.activities[*].probability',
 '$.timelineObjects[*].activitySegment.activityType',
 '$.timelineObjects[*].activitySegment.confidence',
 '$.timelineObjects[*].activitySegment.distance',
 '$.timelineObjects[*].activitySegment.duration',
 '$.timelineObjects[*].activitySegment.duration.endTimestamp',
 '$.timelineObjects[*].activitySegment.duration.startTimestamp',
 '$.timelineObjects[*].activitySegment.editActionMetadata',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityConfidence',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityType',
 '$.timeli

In [8]:
tree.get_children_from_path("$.timelineObjects[4].activitySegment.activities[0].activityType")

[NodeDict 'activityType'
 - fieldName: activityType
 - data: Length of 9
 - foundType: str
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - example: None
 - regex: None
 - parent: [0]
 - path: $.timelineObjects[4].activitySegment.activities[0].activityType]

#### `get_children_from_path` supports wildcards and returns every match

In [9]:
tree.get_children_from_path("$.timelineObjects[4].activitySegment.activities[*].*")

[NodeDict 'activityType'
 - fieldName: activityType
 - data: Length of 9
 - foundType: str
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - example: None
 - regex: None
 - parent: [0]
 - path: $.timelineObjects[4].activitySegment.activities[0].activityType,
 NodeDict 'probability'
 - fieldName: probability
 - data: Length of 17
 - foundType: float
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - example: None
 - regex: None
 - parent: [0]
 - path: $.timelineObjects[4].activitySegment.activities[0].probability,
 NodeDict 'activityType'
 - fieldName: activityType
 - data: Length of 8
 - foundType: str
 - descriptiveType: None
 - unique: None
 - default: None
 - description: None
 - example: None
 - regex: None
 - parent: [1]
 - path: $.timelineObjects[4].activitySegment.activities[1].activityType,
 NodeDict 'probability'
 - fieldName: probability
 - data: Length of 18
 - foundType: float
 - descriptiveType: None
 - unique:

## Data semantics - Model

### We create a new model

In [10]:
from json_ld_semantics.model import Model
model = Model()

### We can add files to a model to process it

In [11]:
model.add_files(filename)
model.process_files()

[('../inputs/2022_MARCH.json', {'dictionary_item_added': [root['$']]})]

### Or, if we already have a traversal, we can create the model directly from it

In [12]:
model = Model(traversal=tree.export_traversal())

### Now, we can see what paths are available in our model

In [13]:
model.get_paths()

{'$',
 '$.timelineObjects',
 '$.timelineObjects[*]',
 '$.timelineObjects[*].activitySegment',
 '$.timelineObjects[*].activitySegment.activities',
 '$.timelineObjects[*].activitySegment.activities[*]',
 '$.timelineObjects[*].activitySegment.activities[*].activityType',
 '$.timelineObjects[*].activitySegment.activities[*].probability',
 '$.timelineObjects[*].activitySegment.activityType',
 '$.timelineObjects[*].activitySegment.confidence',
 '$.timelineObjects[*].activitySegment.distance',
 '$.timelineObjects[*].activitySegment.duration',
 '$.timelineObjects[*].activitySegment.duration.endTimestamp',
 '$.timelineObjects[*].activitySegment.duration.startTimestamp',
 '$.timelineObjects[*].activitySegment.editActionMetadata',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityConfidence',
 '$.timelineObjects[*].activitySegment.editActionMetadata.activitySegment.activityType',
 '$.timeli

### But more importantly, we can see what attributes are linked to these paths

In [14]:
import pandas as pd

In [15]:
liste = model.to_list()
df = pd.DataFrame(data=liste[1:], columns=liste[0])
df.head()

Unnamed: 0,path,foundType,descriptiveType,unique,default,description,example,regex
0,$,<class 'json_ld_semantics.semantics.Root'>,,,,,,
1,$.timelineObjects,<class 'list'>,,,,,,
2,$.timelineObjects[*],<class 'dict'>,,,,,,
3,$.timelineObjects[*].placeVisit,<class 'dict'>,,,,,,
4,$.timelineObjects[*].placeVisit.location,<class 'dict'>,,,,,,


### It's a bit empty for now... Let's add some info.

In [16]:
model.set_attribute(
    "$.timelineObjects",
    unique=True,
    description="List of semantic locations dertermined by Google. Can be either `placeVisit` or `activitySegment`."
)

True

In [17]:
model.set_attribute(
    "$.timelineObjects[*]",
    unique=False,
    description="One of the semantic locations. Can be either `placeVisit` or `activitySegment`."
)

True

In [18]:
model.set_attribute(
    "$.timelineObjects[*].placeVisit.location",
    descriptiveType="https://schema.org/location",
    unique=False,
    description="Location that Google thinks you visited (latitude, longitude, id, address, name and confidence)."
)

True

In [19]:
liste = model.to_list()
df = pd.DataFrame(data=liste[1:], columns=liste[0])
df.head()

Unnamed: 0,path,foundType,descriptiveType,unique,default,description,example,regex
0,$,<class 'json_ld_semantics.semantics.Root'>,,,,,,
1,$.timelineObjects,<class 'list'>,,True,,List of semantic locations dertermined by Goog...,,
2,$.timelineObjects[*],<class 'dict'>,,False,,One of the semantic locations. Can be either `...,,
3,$.timelineObjects[*].placeVisit,<class 'dict'>,,,,,,
4,$.timelineObjects[*].placeVisit.location,<class 'dict'>,https://schema.org/location,False,,Location that Google thinks you visited (latit...,,
