## Introduction

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import json

## Data exploration

### First, we load the raw JSON data

In [3]:
filename = "../inputs/pull_requests_000001.json"

with open(filename, "r") as jsonfile:
    json_data = json.loads(jsonfile.read())
json_data

[{'type': 'pull_request',
  'url': 'https://github.com/JeanMichel/infovisu17/pull/1',
  'user': 'https://github.com/JeanMichel',
  'repository': 'https://github.com/JeanMichel/infovisu17',
  'title': 'Refactoring',
  'body': 'Did some work over here.',
  'base': {'ref': 'master',
   'sha': '18664f82c2673281d952266284f8c43eb28484a5',
   'user': 'https://github.com/JeanMichel',
   'repo': 'https://github.com/JeanMichel/infovisu17'},
  'head': {'ref': '20170328',
   'sha': '4824635ed4088909d192c3afeac6e18f42204389',
   'user': 'https://github.com/JeanMichel',
   'repo': 'https://github.com/JeanMichel/infovisu17'},
  'assignee': 'https://github.com/JeanMichel',
  'assignees': ['https://github.com/JeanMichel',
   'https://github.com/JeanMiqueline',
   'https://github.com/JeanMarie'],
  'milestone': None,
  'labels': [],
  'reactions': [],
  'review_requests': [{'reviewer': 'https://github.com/JeanMiqueline',
    'reviewer_type': 'User',
    'created_at': '2017-03-28T19:47:14Z',
    'updated

### Then, we create a tree with the data

In [4]:
from json_ld_semantics.semantics import Tree
tree = Tree(json_data)

### We can explore whatever is inside

In [6]:
print(tree)

Tree '$' with 44 childrens and with 30 paths
- fieldName: $
- data: Length of 44
- foundType: Root
- descriptiveType: None
- unique: None
- default: None
- description: None
- example: None
- regex: None
- parent: None
- traversal: 30 paths
- children: Length of 44
- path: $



In [7]:
print(tree.get_paths_fancy())

$
  $[*]
    $[*].type
    $[*].url
    $[*].user
    $[*].repository
    $[*].title
    $[*].body
    $[*].base
      $[*].base.ref
      $[*].base.sha
      $[*].base.user
      $[*].base.repo
    $[*].head
      $[*].head.ref
      $[*].head.sha
      $[*].head.user
      $[*].head.repo
    $[*].assignee
    $[*].assignees
    $[*].milestone
    $[*].labels
    $[*].reactions
    $[*].review_requests
    $[*].close_issue_references
    $[*].work_in_progress
    $[*].merged_at
    $[*].closed_at
    $[*].created_at
    $[*].merge_commit_sha



## Creating the model

In [18]:
from json_ld_semantics.model import Model
model = Model()

### We can add files to a model to process it

In [19]:
model.add_files(filename)
model.process_files()

[('../inputs/pull_requests_000001.json',
  {'dictionary_item_added': [root['[*]']]})]

### Or, if we already have a traversal, we can create the model directly from it

In [65]:
model = Model(traversal=tree.export_traversal())

### Now, we can see what paths are available in our model

In [66]:
model.get_paths()

{'$',
 '$[*]',
 '$[*].assignee',
 '$[*].assignees',
 '$[*].base',
 '$[*].base.ref',
 '$[*].base.repo',
 '$[*].base.sha',
 '$[*].base.user',
 '$[*].body',
 '$[*].close_issue_references',
 '$[*].closed_at',
 '$[*].created_at',
 '$[*].head',
 '$[*].head.ref',
 '$[*].head.repo',
 '$[*].head.sha',
 '$[*].head.user',
 '$[*].labels',
 '$[*].merge_commit_sha',
 '$[*].merged_at',
 '$[*].milestone',
 '$[*].reactions',
 '$[*].repository',
 '$[*].review_requests',
 '$[*].title',
 '$[*].type',
 '$[*].url',
 '$[*].user',
 '$[*].work_in_progress'}

### But more importantly, we can see what attributes are linked to these paths

In [75]:
model.to_list()

[['path',
  'foundType',
  'descriptiveType',
  'unique',
  'default',
  'description',
  'example',
  'regex'],
 [['$', json_ld_semantics.semantics.Root, None, None, None, None, None, None],
  ['$[*]', dict, None, None, None, None, None, None],
  ['$[*].type', str, None, None, None, None, None, None],
  ['$[*].url', str, None, None, None, None, None, None],
  ['$[*].user', str, None, None, None, None, None, None],
  ['$[*].repository', str, None, None, None, None, None, None],
  ['$[*].title', str, None, None, None, None, None, None],
  ['$[*].body', str, None, None, None, None, None, None],
  ['$[*].base', dict, None, None, None, None, None, None],
  ['$[*].base.ref', str, None, None, None, None, None, None],
  ['$[*].base.sha', str, None, None, None, None, None, None],
  ['$[*].base.user', str, None, None, None, None, None, None],
  ['$[*].base.repo', str, None, None, None, None, None, None],
  ['$[*].head', dict, None, None, None, None, None, None],
  ['$[*].head.ref', str, None, N

### It's a bit empty for now... Let's add elements.

In [76]:
model.set_attribute(
    "$[*].url",
    descriptiveType="https://schema.org/url",
    unique=False,
    description="URL to that pull request",
    example="https://github.com/JeanMichel/infovisu17/pull/1",
    regex="^https:\/\/github\.com\/\w+\/\w+\/pull\/\d+$"
)

In [77]:
model.to_list()

[['path',
  'foundType',
  'descriptiveType',
  'unique',
  'default',
  'description',
  'example',
  'regex'],
 [['$', json_ld_semantics.semantics.Root, None, None, None, None, None, None],
  ['$[*]', dict, None, None, None, None, None, None],
  ['$[*].type', str, None, None, None, None, None, None],
  ['$[*].url', str, None, None, None, None, None, None],
  ['$[*].user', str, None, None, None, None, None, None],
  ['$[*].repository', str, None, None, None, None, None, None],
  ['$[*].title', str, None, None, None, None, None, None],
  ['$[*].body', str, None, None, None, None, None, None],
  ['$[*].base', dict, None, None, None, None, None, None],
  ['$[*].base.ref', str, None, None, None, None, None, None],
  ['$[*].base.sha', str, None, None, None, None, None, None],
  ['$[*].base.user', str, None, None, None, None, None, None],
  ['$[*].base.repo', str, None, None, None, None, None, None],
  ['$[*].head', dict, None, None, None, None, None, None],
  ['$[*].head.ref', str, None, N