## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
from pprint import pprint

In [None]:
zipfilename = "../inputs/twitter-2022-02-17-cf8888eb631a941f287fbfec1a2662e1127775f1ba68efad59880f2fafdcfea7.zip"

## Parsing

- We are parsing the ZIP from Twitter, using a ZIPParser with a TwitterJSParser as argument.
- This parsing will return a list of Trees to explore.

In [None]:
from argonodes.parsers import ZIPParser, TwitterJSParser
zipparser = ZIPParser(parser=TwitterJSParser, regex=r"data\/.*\.js$", extension="js", verbose=1)
trees = zipparser(zipfilename)

## Tree exploration

- For each Tree, we do a bit of exploration, and try to add informations.

In [None]:
from argonodes.nodes import NA

### Installing facultative imports

We are going to use FoundRegex, which uses an external package, `tdda`.

In [None]:
!pip install tdda

In [None]:
from argonodes.appliers import FoundRegex
found_regex = FoundRegex()

#### Do we have some pattern?

We are going to do a "full example" using FoundRegex, but sometimes you do not need it at all.

In [None]:
for i, tree in enumerate(trees.values()):
    found_regex(tree)
    if i == 3:  # Because it can take some time...
        break
pprint(found_regex.data)

### data/account-creation-ip.js

In [None]:
cur_tree = trees["data/account-creation-ip.js"]

In [None]:
print(f"Filename: {cur_tree.filename}")
print(f"Paths:\n{cur_tree.get_paths_fancy()}")

#### Adding information where we can

In [None]:
cur_tree.set_attributes(
    "data/account-creation-ip.js:$",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description="What IP was used to create that account.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account-creation-ip.js:$[*].accountCreationIp.accountId",
    descriptiveType="https://schema.org/identifier",
    unique=True,
    default=NA,
    description="Unique account ID for that user.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account-creation-ip.js:$[*].accountCreationIp.userCreationIp",
    descriptiveType="https://github.com/hestiaAI/Argonodes/wiki/General:IPv4",
    unique=True,
    default=NA,
    description="Unique account ID for that user.",
    choices=NA,
    regex=[r"(\b25[0-5]|\b2[0-4][0-9]|\b[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"],
)

### data/account-suspension.js

In [None]:
cur_tree = trees["data/account-suspension.js"]
cur_tree

#### ... Nothing in there...

### data/account-timezone.js

In [None]:
cur_tree = trees["data/account-timezone.js"]

In [None]:
print(f"Filename: {cur_tree.filename}")
print(f"Paths:\n{cur_tree.get_paths_fancy()}")

#### Adding information where we can

In [None]:
cur_tree.set_attributes(
    "data/account-timezone.js:$",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description=NA,
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account-timezone.js:$[*].accountTimezone.accountId",
    descriptiveType="https://schema.org/identifier",
    unique=True,
    default=NA,
    description="Unique account ID for that user.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account-timezone.js:$[*].accountTimezone.timeZone",
    descriptiveType="https://schema.org/scheduleTimezone",
    unique=False,
    default=NA,
    description="Timezone used when creating the account.",
    choices=NA,
    regex=[r"\w+"],
)

### data/account.js

In [None]:
cur_tree = trees["data/account.js"]

In [None]:
print(f"Filename: {cur_tree.filename}")
print(f"Paths:\n{cur_tree.get_paths_fancy()}")

#### Adding information where we can

In [None]:
cur_tree.set_attributes(
    "data/account.js:$",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description=NA,
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account.js:$[*].account.accountDisplayName",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description="Current display name for that account.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account.js:$[*].account.accountId",
    descriptiveType="https://schema.org/identifier",
    unique=True,
    default=NA,
    description="Unique account ID for that user.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account.js:$[*].account.createdAt",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description="Timestamp for the creation of that account.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account.js:$[*].account.createdVia",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description="Platform used to create that account.",
    choices=NA,
    regex=NA,
)

In [None]:
cur_tree.set_attributes(
    "data/account.js:$[*].account.email",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description="Email linked to that account.",
    choices=NA,
    regex=[r"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"],
)

In [None]:
cur_tree.set_attributes(
    "data/account.js:$[*].account.username",
    descriptiveType=NA,
    unique=True,
    default=NA,
    description="Current username for that account.",
    choices=NA,
    regex=[r"^(\w){1,15}$"],
)

#### ... etc.

## Model

- We are now creating the Model based on the different trees.
- This Model will contains all our definitions, along with the correct paths.

In [None]:
from argonodes.models import Model
model = Model(trees=trees.values(), name="Twitter")

In [None]:
model.changes[-1]

### Exporting the Models

In [None]:
model.export_traversal(filename="../outputs/Twitter.md", scheme="markdown")

In [None]:
model.export_traversal(filename="../outputs/Twitter.json", scheme="json")

#### Preview

In [None]:
model.export_traversal(scheme="markdown")