In [21]:
from collections import Counter
from collections import defaultdict
import itertools
import json

import numpy as np

In [39]:
# Path to file with JSON provenance documents, one per line.
PATH_SAMPLE_DATA = "/Users/vr24/logstash-5.0.0/make_history_output.log"
RELATIONSHIP_TYPE_KEYNAME = unicode("prov")
SEED = 0
FIELDS_KEY = "@fields"
INSTANCE_KEY = "instance"
DOCUMENT_KEY = "document"
MESSAGE_KEY = "@message"

In [3]:
# Load and count log records.
with open(PATH_SAMPLE_DATA, "r") as logfile:
    logs = [json.loads(line.strip()) for line in logfile]
print("There are {} logs.".format(len(logs)))

There are 4360 logs.


In [4]:
# What's the space of depicted relationships look like?
relationship_type_names = {log[RELATIONSHIP_TYPE_KEYNAME] for log in logs}
relationship_type_names

{u'activity',
 u'agent',
 u'entity',
 u'hadMember',
 u'prefix',
 u'used',
 u'wasAssociatedWith',
 u'wasGeneratedBy',
 u'wasInfluencedBy'}

In [13]:
# Let's view some random records to see what we're up against.
np.random.seed(SEED)
sample_logs = np.asarray(logs)[np.random.randint(0, len(logs), 5)]
sample_logs

array([ {u'@fields': {u'_:id2': {u'prov:agent': u'people:vr24', u'prov:activity': u'is:97e270b8-7fd6-48b0-a8e2-13e1bdde8771'}}, u'prov': u'wasAssociatedWith', u'@timestamp': u'2016-11-06T00:34:29.709Z', u'instance': u'_:id2', u'host': u'127.0.0.1', u'@source_host': u'withme', u'@message': u'create_file3', u'document': u'is:97e270b8-7fd6-48b0-a8e2-13e1bdde8771', u'@version': 1, u'port': 65079},
       {u'@fields': {u'doc:gbd-read/schema/table': {}, u'code:tests/make_history.py': {u'unk:version_remote': u'https://vr24@stash.ihme.washington.edu/scm/~adolgert/provda.git', u'unk:version_branch_hash': u'372d74f21713f47642fc424e7e3289f38b2ed5a0', u'unk:script': u'/Users/vr24/code/provda/tests/make_history.py', u'unk:version_branch': u'tinkering'}, u'doc:gbd/first_history_test2/cvd_ihd.hdf': {}, u'doc:paf/first_history_test2/cvd_ihd.hdf': {}}, u'prov': u'entity', u'@timestamp': u'2016-11-06T00:34:26.834Z', u'instance': u'doc:gbd-read/schema/table', u'host': u'127.0.0.1', u'@source_host': u'wit

In [16]:
# What's the total space of possible record fields?
# In how many records is each field present?
records_fields_histogram = Counter()
for log in logs:
    records_fields_histogram.update(log.keys())
records_fields_histogram

Counter({u'@fields': 4360,
         u'@message': 4360,
         u'@source_host': 4360,
         u'@timestamp': 4360,
         u'@version': 4360,
         u'document': 4360,
         u'host': 4360,
         u'instance': 4360,
         u'port': 4360,
         u'prov': 4360})

In [18]:
# Excellent, we have a standard at the record level!
# What does the distribution of provenance document type name look like?
log_type_histogram = Counter(str(log[RELATIONSHIP_TYPE_KEYNAME]) for log in logs)
log_type_histogram

Counter({'activity': 220,
         'agent': 220,
         'entity': 1080,
         'hadMember': 200,
         'prefix': 1540,
         'used': 620,
         'wasAssociatedWith': 220,
         'wasGeneratedBy': 240,
         'wasInfluencedBy': 20})

In [20]:
# OK, we could consider using the provenance document type as the type within the ES index.
# Let's get a trio of log record samples for each of the document types.
logs_by_type = defaultdict(list)
for log in logs:
    logs_by_type[log[RELATIONSHIP_TYPE_KEYNAME]].append(log)
log_trio_by_type = {log_type: logs[:3] for log_type, logs in logs_by_type.items()}
log_trio_by_type

{u'activity': [{u'@fields': {u'is:21090197-6cce-4b66-8bc9-12e03029d84e': {u'unk:args': u'--child calculate_pafs --tag first_history_test0',
     u'unk:command': u'/Users/vr24/virtualenvs/general_personal_dev_env/bin/python',
     u'unk:date': u'2016-11-05T16:53:06-07:00',
     u'unk:group_id': {u'$': 37253, u'type': u'xsd:int'},
     u'unk:hostname': u'Gladstone.domain',
     u'unk:interpreter': u'2.7.10 (default, Oct 23 2015, 19:19:21) ',
     u'unk:platform': u'Darwin-15.5.0-x86_64-i386-64bit',
     u'unk:process_id': {u'$': 37260, u'type': u'xsd:int'},
     u'unk:sge_job_id': u'100'}},
   u'@message': u'create_file3',
   u'@source_host': u'withme',
   u'@timestamp': u'2016-11-05T23:53:07.028Z',
   u'@version': 1,
   u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
   u'host': u'127.0.0.1',
   u'instance': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
   u'port': 64675,
   u'prov': u'activity'},
  {u'@fields': {u'is:88a9ac0e-c9aa-408f-8f16-5b2e964d3661': {u'unk:args': u'--child

In [29]:
# Let's go type-wise to figure out how the '@fields' field differs.
fields_sample_by_doctype = {doctype: logs[0][FIELDS_KEY] 
                            for doctype, logs in logs_by_type.items()}
fields_sample_by_doctype

{u'activity': {u'is:21090197-6cce-4b66-8bc9-12e03029d84e': {u'unk:args': u'--child calculate_pafs --tag first_history_test0',
   u'unk:command': u'/Users/vr24/virtualenvs/general_personal_dev_env/bin/python',
   u'unk:date': u'2016-11-05T16:53:06-07:00',
   u'unk:group_id': {u'$': 37253, u'type': u'xsd:int'},
   u'unk:hostname': u'Gladstone.domain',
   u'unk:interpreter': u'2.7.10 (default, Oct 23 2015, 19:19:21) ',
   u'unk:platform': u'Darwin-15.5.0-x86_64-i386-64bit',
   u'unk:process_id': {u'$': 37260, u'type': u'xsd:int'},
   u'unk:sge_job_id': u'100'}},
 u'agent': {u'people:vr24': {u'unk:fullname': u'Vincent Reuter',
   u'unk:homedir': u'/Users/vr24'}},
 u'entity': {u'code:tests/make_history.py': {u'unk:script': u'/Users/vr24/code/provda/tests/make_history.py',
   u'unk:version_branch': u'tinkering',
   u'unk:version_branch_hash': u'372d74f21713f47642fc424e7e3289f38b2ed5a0',
   u'unk:version_remote': u'https://vr24@stash.ihme.washington.edu/scm/~adolgert/provda.git'},
  u'doc:gbd

In [40]:
def get_unique_values(doc_key, logs):
    return {log[doc_key] for log in logs}

instances = get_unique_values(INSTANCE_KEY, logs)
documents = get_unique_values(DOCUMENT_KEY, logs)
messages = get_unique_values(MESSAGE_KEY, logs)
num_values_by_key = {
    INSTANCE_KEY: len(instances), 
    DOCUMENT_KEY: len(documents), 
    MESSAGE_KEY: len(messages)
}
num_values_by_key

{'@message': 1, 'document': 220, 'instance': 385}

In [44]:
# OK, let's actually go type-wise and try to define a mapping for each "@fields".
fields_field_by_type = {doc_type: [log["@fields"] for log in logs] 
                        for doc_type, logs in logs_by_type.items()}

In [45]:
# Let's check out the possibilities for an "activity."
activity_fields_keys = set()
for fields in fields_field_by_type["activity"]:
    activity_fields_keys |= set(fields.keys())
activity_fields_keys

{u'is:0062c93f-f450-4822-8402-7a8ffb5ddba7',
 u'is:01fae16a-81a5-4f62-8383-bbcdcf8c4b41',
 u'is:04cf8cad-d70d-4fd9-bc01-37ab23bfdec5',
 u'is:08873b7f-c512-425b-a983-5b49382cc160',
 u'is:09afef7d-e937-48c0-a3ea-90233827cada',
 u'is:0bf9b8ca-4cee-420b-a7cc-b9ab2da9aed3',
 u'is:0c688830-fdd5-4414-981d-26ec7bc06740',
 u'is:0ece8f0a-bfc7-4d4a-8aff-25b335fecf99',
 u'is:0f38887a-d5e8-4221-8d8a-cc13496b04a3',
 u'is:0f973ce9-1a6f-477c-a77d-1f4d6a6ae1fc',
 u'is:0fcb925d-515a-492e-8fc2-96c8e57d81f1',
 u'is:124a34af-9b36-4bc3-9093-0e4056ce03a3',
 u'is:159a3e8f-0a8b-4b9e-99b1-6d21129dabf3',
 u'is:16383d51-2ba7-4702-9373-ba434826839b',
 u'is:17982084-0ee5-4bf3-84b4-fe0253b702bb',
 u'is:17d1a59c-3abc-4b22-b26c-af3c84b6bec6',
 u'is:1988347b-1cbf-4551-842b-34b7456e5958',
 u'is:1ba298ff-b461-421c-bc41-e4e0fc86128e',
 u'is:1c7cbe1b-7432-4574-8387-caa0c2834114',
 u'is:1f6db551-236b-4c27-a0a0-566ddac96562',
 u'is:1f9c3152-f556-43a9-8b6b-691cf2645558',
 u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'is:2235

In [47]:
# OK, each "@fields" value for an acitivity appears to be a single K-V pair mapping (object).
# Let's check that hypothesis.
assert all([1 == len(fields_object) for fields_object in fields_field_by_type["activity"]])

In [52]:
# OK, let's investigate the values.
activity_fields_object_values_keys = set()
for instance in fields_field_by_type["activity"]:
    for kv_pairs in instance.values():
        activity_fields_object_values_keys |= set(kv_pairs.keys())
activity_fields_object_values_keys

{u'unk:args',
 u'unk:command',
 u'unk:date',
 u'unk:group_id',
 u'unk:hostname',
 u'unk:interpreter',
 u'unk:platform',
 u'unk:process_id',
 u'unk:sge_job_id'}

In [59]:
# Cool! It appears that each "@fields" instance is an object with uniform schema.
# Let's look back at the space of document types to bifurcate the document types
# as either relationship (edge) type or node type.
logs_by_type.keys()

[u'wasAssociatedWith',
 u'hadMember',
 u'used',
 u'agent',
 u'entity',
 u'prefix',
 u'activity',
 u'wasInfluencedBy',
 u'wasGeneratedBy']

In [72]:
# What is going on with "prefix?"
len(logs_by_type["prefix"])

1540

In [73]:
len({log["document"] for log in logs_by_type["prefix"]})

220

In [74]:
len(logs)

4360

In [80]:
# Hmm, how many document tags are there within the collection of logs?
unique_document_ids = {log["document"] for log in logs}
len(unique_document_ids)

220

In [83]:
# How many prefixes are there?
len(logs_by_type["activity"])

220

In [86]:
# What agents are out there?
len(logs_by_type["agent"])

220

In [87]:
# What defines an agent?
log_trio_by_type["agent"]

[{u'@fields': {u'people:vr24': {u'unk:fullname': u'Vincent Reuter',
    u'unk:homedir': u'/Users/vr24'}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:07.027Z',
  u'@version': 1,
  u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
  u'host': u'127.0.0.1',
  u'instance': u'people:vr24',
  u'port': 64675,
  u'prov': u'agent'},
 {u'@fields': {u'people:vr24': {u'unk:fullname': u'Vincent Reuter',
    u'unk:homedir': u'/Users/vr24'}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:07.525Z',
  u'@version': 1,
  u'document': u'is:88a9ac0e-c9aa-408f-8f16-5b2e964d3661',
  u'host': u'127.0.0.1',
  u'instance': u'people:vr24',
  u'port': 64676,
  u'prov': u'agent'},
 {u'@fields': {u'people:vr24': {u'unk:fullname': u'Vincent Reuter',
    u'unk:homedir': u'/Users/vr24'}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:08.250Z',
  u'@version

In [89]:
# Is an agent defined by its fields?
agent_fields_lengths = {len(log["@fields"]) for log in logs_by_type["agent"]}
agent_fields_lengths

{1}

In [90]:
# OK, let's assume that an agent is defined by its fields.
# An agent also seems to be keyed on 'instance' (e.g., "people:vr24")
# The fields is then a single-object key-value pair mapping, with key matching 'instance', 
# e.g. {"people:vr24": {"unk:fullname": "Vincent Reuter", "unk:homedir": "/Users/vr24"}}

In [99]:
len(logs_by_type)

11

In [100]:
# Let's look at the other nodes, specifically, "activity" and "entity."

In [101]:
log_trio_by_type["activity"]

[{u'@fields': {u'is:21090197-6cce-4b66-8bc9-12e03029d84e': {u'unk:args': u'--child calculate_pafs --tag first_history_test0',
    u'unk:command': u'/Users/vr24/virtualenvs/general_personal_dev_env/bin/python',
    u'unk:date': u'2016-11-05T16:53:06-07:00',
    u'unk:group_id': {u'$': 37253, u'type': u'xsd:int'},
    u'unk:hostname': u'Gladstone.domain',
    u'unk:interpreter': u'2.7.10 (default, Oct 23 2015, 19:19:21) ',
    u'unk:platform': u'Darwin-15.5.0-x86_64-i386-64bit',
    u'unk:process_id': {u'$': 37260, u'type': u'xsd:int'},
    u'unk:sge_job_id': u'100'}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:07.028Z',
  u'@version': 1,
  u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
  u'host': u'127.0.0.1',
  u'instance': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
  u'port': 64675,
  u'prov': u'activity'},
 {u'@fields': {u'is:88a9ac0e-c9aa-408f-8f16-5b2e964d3661': {u'unk:args': u'--child calculate_pafs --tag first_hist

In [103]:
# For activities, what does the instance space look like?
activity_instances = {log["instance"] for log in logs_by_type["activity"]}
len(activity_instances)

220

In [104]:
activity_instances

{u'is:0062c93f-f450-4822-8402-7a8ffb5ddba7',
 u'is:01fae16a-81a5-4f62-8383-bbcdcf8c4b41',
 u'is:04cf8cad-d70d-4fd9-bc01-37ab23bfdec5',
 u'is:08873b7f-c512-425b-a983-5b49382cc160',
 u'is:09afef7d-e937-48c0-a3ea-90233827cada',
 u'is:0bf9b8ca-4cee-420b-a7cc-b9ab2da9aed3',
 u'is:0c688830-fdd5-4414-981d-26ec7bc06740',
 u'is:0ece8f0a-bfc7-4d4a-8aff-25b335fecf99',
 u'is:0f38887a-d5e8-4221-8d8a-cc13496b04a3',
 u'is:0f973ce9-1a6f-477c-a77d-1f4d6a6ae1fc',
 u'is:0fcb925d-515a-492e-8fc2-96c8e57d81f1',
 u'is:124a34af-9b36-4bc3-9093-0e4056ce03a3',
 u'is:159a3e8f-0a8b-4b9e-99b1-6d21129dabf3',
 u'is:16383d51-2ba7-4702-9373-ba434826839b',
 u'is:17982084-0ee5-4bf3-84b4-fe0253b702bb',
 u'is:17d1a59c-3abc-4b22-b26c-af3c84b6bec6',
 u'is:1988347b-1cbf-4551-842b-34b7456e5958',
 u'is:1ba298ff-b461-421c-bc41-e4e0fc86128e',
 u'is:1c7cbe1b-7432-4574-8387-caa0c2834114',
 u'is:1f6db551-236b-4c27-a0a0-566ddac96562',
 u'is:1f9c3152-f556-43a9-8b6b-691cf2645558',
 u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'is:2235

In [106]:
# OK great, each activity instance tag seems to be a single, hash-like string prefixed with 'is:' (perhaps namespace?)
# Let's investigage the fields for an activity
logs_by_type["activity"][0]

{u'@fields': {u'is:21090197-6cce-4b66-8bc9-12e03029d84e': {u'unk:args': u'--child calculate_pafs --tag first_history_test0',
   u'unk:command': u'/Users/vr24/virtualenvs/general_personal_dev_env/bin/python',
   u'unk:date': u'2016-11-05T16:53:06-07:00',
   u'unk:group_id': {u'$': 37253, u'type': u'xsd:int'},
   u'unk:hostname': u'Gladstone.domain',
   u'unk:interpreter': u'2.7.10 (default, Oct 23 2015, 19:19:21) ',
   u'unk:platform': u'Darwin-15.5.0-x86_64-i386-64bit',
   u'unk:process_id': {u'$': 37260, u'type': u'xsd:int'},
   u'unk:sge_job_id': u'100'}},
 u'@message': u'create_file3',
 u'@source_host': u'withme',
 u'@timestamp': u'2016-11-05T23:53:07.028Z',
 u'@version': 1,
 u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'host': u'127.0.0.1',
 u'instance': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'port': 64675,
 u'prov': u'activity'}

In [110]:
# OK, there are many more properties than for an agent, but this is manageable.
# Activity is defined by "@fields" object, too.
# Is there any variability in the prefix for each of the field names for an activity instance?
activity_fields_object_values_keys = set()
for activity_document in logs_by_type["activity"]:
    fields_object_values = activity_document["@fields"].values()
    assert 1 == sum(1 for _ in fields_object_values)
    activity_fields_object_values_keys.update(fields_object_values[0].keys())
len(activity_fields_object_values_keys)

9

In [111]:
activity_fields_object_values_keys

{u'unk:args',
 u'unk:command',
 u'unk:date',
 u'unk:group_id',
 u'unk:hostname',
 u'unk:interpreter',
 u'unk:platform',
 u'unk:process_id',
 u'unk:sge_job_id'}

In [112]:
# What about an entity?
log_trio_by_type["entity"]

[{u'@fields': {u'code:tests/make_history.py': {u'unk:script': u'/Users/vr24/code/provda/tests/make_history.py',
    u'unk:version_branch': u'tinkering',
    u'unk:version_branch_hash': u'372d74f21713f47642fc424e7e3289f38b2ed5a0',
    u'unk:version_remote': u'https://vr24@stash.ihme.washington.edu/scm/~adolgert/provda.git'},
   u'doc:gbd-read/schema/table': {},
   u'doc:gbd/first_history_test0/cvd_ihd.hdf': {},
   u'doc:paf/first_history_test0/cvd_ihd.hdf': {}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:07.027Z',
  u'@version': 1,
  u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
  u'host': u'127.0.0.1',
  u'instance': u'doc:gbd-read/schema/table',
  u'port': 64675,
  u'prov': u'entity'},
 {u'@fields': {u'code:tests/make_history.py': {u'unk:script': u'/Users/vr24/code/provda/tests/make_history.py',
    u'unk:version_branch': u'tinkering',
    u'unk:version_branch_hash': u'372d74f21713f47642fc424e7e3289f38b2ed5a0',
    u'un

In [113]:
logs[0]

{u'@fields': {u'_:id2': {u'prov:activity': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
   u'prov:agent': u'people:vr24'}},
 u'@message': u'create_file3',
 u'@source_host': u'withme',
 u'@timestamp': u'2016-11-05T23:53:07.027Z',
 u'@version': 1,
 u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'host': u'127.0.0.1',
 u'instance': u'_:id2',
 u'port': 64675,
 u'prov': u'wasAssociatedWith'}

In [116]:
instance_id = 'is:21090197-6cce-4b66-8bc9-12e03029d84e'
specific_instance_logs = \
    filter(lambda log: log["instance"] == 'is:21090197-6cce-4b66-8bc9-12e03029d84e', logs)
len(specific_instance_logs)

1

In [117]:
specific_instance_logs[0]

{u'@fields': {u'is:21090197-6cce-4b66-8bc9-12e03029d84e': {u'unk:args': u'--child calculate_pafs --tag first_history_test0',
   u'unk:command': u'/Users/vr24/virtualenvs/general_personal_dev_env/bin/python',
   u'unk:date': u'2016-11-05T16:53:06-07:00',
   u'unk:group_id': {u'$': 37253, u'type': u'xsd:int'},
   u'unk:hostname': u'Gladstone.domain',
   u'unk:interpreter': u'2.7.10 (default, Oct 23 2015, 19:19:21) ',
   u'unk:platform': u'Darwin-15.5.0-x86_64-i386-64bit',
   u'unk:process_id': {u'$': 37260, u'type': u'xsd:int'},
   u'unk:sge_job_id': u'100'}},
 u'@message': u'create_file3',
 u'@source_host': u'withme',
 u'@timestamp': u'2016-11-05T23:53:07.028Z',
 u'@version': 1,
 u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'host': u'127.0.0.1',
 u'instance': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
 u'port': 64675,
 u'prov': u'activity'}

In [118]:
document_id = 'is:21090197-6cce-4b66-8bc9-12e03029d84e'
specific_document_logs = \
    filter(lambda log: log["document"] == 'is:21090197-6cce-4b66-8bc9-12e03029d84e', logs)
len(specific_document_logs)

18

In [119]:
specific_document_logs

[{u'@fields': {u'_:id2': {u'prov:activity': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
    u'prov:agent': u'people:vr24'}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:07.027Z',
  u'@version': 1,
  u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
  u'host': u'127.0.0.1',
  u'instance': u'_:id2',
  u'port': 64675,
  u'prov': u'wasAssociatedWith'},
 {u'@fields': {u'_:id1': {u'prov:activity': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
    u'prov:entity': u'code:tests/make_history.py'},
   u'_:id3': {u'prov:activity': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
    u'prov:entity': u'doc:gbd/first_history_test0/cvd_ihd.hdf'},
   u'_:id4': {u'prov:activity': u'is:21090197-6cce-4b66-8bc9-12e03029d84e',
    u'prov:entity': u'doc:gbd-read/schema/table'}},
  u'@message': u'create_file3',
  u'@source_host': u'withme',
  u'@timestamp': u'2016-11-05T23:53:07.027Z',
  u'@version': 1,
  u'document': u'is:21090197-6cce-4b66-8bc9-12e03029d

In [120]:
# Does the space of documents account for the total log collection? Let's investigate!
document_ids = {log["document"] for log in logs}
len(document_ids)

220

In [123]:
def filter_records(logs, field, match_value):
    return [log for log in logs if log[field] == match_value]

# What's the distribution of number of records per document ID?
num_docs_by_record_count = defaultdict(int)
for doc_id in document_ids:
    num_docs_by_record_count[sum(1 for _ in filter_records(logs, "document", doc_id))] += 1
num_docs_by_record_count

defaultdict(int, {18: 160, 20: 40, 34: 20})

In [126]:
{log["prov"] for log in logs}

{u'activity',
 u'agent',
 u'entity',
 u'hadMember',
 u'prefix',
 u'used',
 u'wasAssociatedWith',
 u'wasGeneratedBy',
 u'wasInfluencedBy'}