In [14]:
import json
import itertools
import numpy as np
import pandas as pd

import rule_application as ra
from grapher import Grapher
from temporal_walk import Temporal_Walk
from rule_learning import Rule_Learner, verbalize_rule, rules_statistics
from score_functions import score_12

In [15]:
dataset = "icewsmol"
rules_file = "2609222150_r[1,2,3]_n200_exp_s12_rules.json"
dir_path = dataset + "/"

In [16]:
data = Grapher("../data/" + dir_path)
print("Shapes: ", np.shape(data.train_idx), np.shape(data.valid_idx), np.shape(data.test_idx))
print(data.train_idx[:5])

Grapher initialized.
Shapes:  (18580, 4) (9000, 4) (9000, 4)
[[ 146  178 2665    0]
 [ 265  156 1183    0]
 [ 382  127 2100    0]
 [ 467  224 1176    0]
 [ 467  229 6495    0]]


In [17]:
rules_dict = json.load(open("../output/" + dir_path + rules_file))
rules_dict = {int(k):v for k,v in rules_dict.items()}
rules_statistics(rules_dict)

Number of relations with rules:  308
Total number of rules:  20656
Number of rules by length:  [(1, 2950), (2, 2241), (3, 15465)]


In [18]:
temporal_walk = Temporal_Walk(data.train_idx, data.inv_relation_id, transition_distr="exp")
rl = Rule_Learner(temporal_walk.edges, temporal_walk.neighbors, data.id2relation, data.inv_relation_id, dataset)

#### Rule Learning

In [19]:
rl.rules_dict = dict()
length = 2
rel_idx = 42

walk_successful = False
while not walk_successful:
    walk_successful, walk = temporal_walk.sample_acyclic_walk(rel_idx)
rl.create_acyclic_rule(walk)
print("Walk: ", walk)
try:
    print("Rule: ", rl.rules_dict[rel_idx][0])
    print(verbalize_rule(rl.rules_dict[rel_idx][0], data.id2relation))
except KeyError:   
    print("No rule created because of missing body or rule support.")

Walk:  {'entities': [359, 2611, 2420, 6738], 'relations': [277, 42, 160], 'timestamps': [33, 33, 42]}
Rule:  {'rule_type': 'acyclic', 'head_rel': 42, 'body_rels': [277, 42, 160], 'var_constraints': [], 'conf': 1.0, 'rule_supp': 1, 'body_supp': 1}
1.000000 a    1     1  Bring_lawsuit_against(X1,X2,T1) <- _Complain_officially(X0,X1,T0), Bring_lawsuit_against(X1,X2,T1), Make_statement(X2,X3,T2)


#### Rule Application

In [20]:
query_idx = 42
window = 0
top_k = 20
learn_edges = data.train_idx

In [21]:
test_query = data.test_idx[query_idx]
print(test_query)
print(data.id2entity[test_query[0]], data.id2relation[test_query[1]], 
      data.id2entity[test_query[2]], data.id2ts[test_query[3]])

[ 956   53 4702  314]
Catherine_Ashton Consult Mohammad_Javad_Zarif 2014-11-11


In [22]:
all_walks = []
score_func = score_12
args = [[0.1, 0.5]]  
cands_dict = [dict() for _ in range(len(args))]
dicts_idx = list(range(len(args)))
cur_ts = test_query[3]
edges = ra.get_window_edges(data.all_idx, cur_ts, learn_edges, window)

if test_query[1] in rules_dict:
    for rule in rules_dict[test_query[1]]:
        walk_edges = ra.match_body_relations_complete(rule, edges, test_query[0])
        rule_walks = pd.DataFrame()
        if 0 not in [len(x) for x in walk_edges]:
            rule_walks = ra.get_walks_complete(rule, walk_edges)
            if rule['var_constraints']:
                rule_walks = ra.check_var_constraints(rule['var_constraints'], rule_walks)
        all_walks.append(rule_walks)
        if not rule_walks.empty:
            cands_dict = ra.get_candidates(rule, rule_walks, cur_ts, cands_dict, 
                                           score_func, args, dicts_idx)
            top_k_scores = [v for _, v in sorted(cands_dict[0].items(), key=lambda item: item[1],
                            reverse=True)][:top_k]
            unique_scores = list(scores for scores,_ in itertools.groupby(top_k_scores))
            if len(unique_scores) >= top_k:
                break
    if cands_dict:
        candidates = dict(sorted(cands_dict[0].items(), key=lambda item: item[1], reverse=True))
    else:
        candidates = {}
        print("No candidates found after rule application.")
else:
    candidates = {}
    print("No rules exist for this query relation.")

      entity_0  relation_0  entity_1  timestamp_0  relation_1  entity_2  \
2          956          53      4702           23          53       956   
3          956          53      4702           23          53       956   
4          956          53      4702           23          53       956   
5          956          53      4702           23          53       956   
6          956          53      4702           23          53       956   
...        ...         ...       ...          ...         ...       ...   
4152       956          53      3341          268          53      6293   
4178       956          53      3341          269          53      6293   
4179       956          53      3341          269          53      6293   
4205       956          53      3341          269          53      6293   
4206       956          53      3341          269          53      6293   

      timestamp_1  relation_2  entity_3  timestamp_2  
2              23         357      4702     

In [23]:
i = 0  # There exist walks that match the rule body but not the time/variable constraints
print(rules_dict[test_query[1]][i])
all_walks[i]

{'rule_type': 'cyclic', 'head_rel': 53, 'body_rels': [403, 110], 'var_constraints': [], 'conf': 1.0, 'rule_supp': 3, 'body_supp': 3}


In [24]:
i = 1  # There exist no walks that match the rule body
print(rules_dict[test_query[1]][i])
all_walks[i]

{'rule_type': 'cyclic', 'head_rel': 53, 'body_rels': [127, 155, 142], 'var_constraints': [[0, 2], [1, 3]], 'conf': 1.0, 'rule_supp': 2, 'body_supp': 2}


Unnamed: 0,entity_0,relation_0,entity_1,timestamp_0,relation_1,entity_2,timestamp_1,relation_2,entity_3,timestamp_2


In [25]:
i = 3 
print(rules_dict[test_query[1]][i])
all_walks[i]

{'rule_type': 'cyclic', 'head_rel': 53, 'body_rels': [333, 162, 103], 'var_constraints': [[0, 2], [1, 3]], 'conf': 1.0, 'rule_supp': 2, 'body_supp': 2}


Unnamed: 0,entity_0,relation_0,entity_1,timestamp_0,relation_1,entity_2,timestamp_1,relation_2,entity_3,timestamp_2


In [13]:
print(ra.verbalize_walk(all_walks[3].iloc[0], data))

IndexError: single positional indexer is out-of-bounds

In [18]:
candidates

{5385: [0.5061387,
  0.5061387,
  0.5050259,
  0.5061387,
  0.5061387,
  0.5050259,
  0.5061387,
  0.16,
  0.017391304,
  0.021389553,
  0.010416667,
  0.013099227],
 6794: [0.5061387, 0.013099227],
 3262: [0.5055545,
  0.5055545,
  0.04,
  0.02970297,
  0.017391304,
  0.013071896,
  0.01056338,
  0.010416667,
  0.008982036,
  0.011923925],
 6293: [0.5055545,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5050259,
  0.5050259,
  0.5050259,
  0.5,
  0.5050259,
  0.16,
  0.017391304,
  0.021389553,
  0.018097814,
  0.01056338,
  0.010416667,
  0.012515055],
 2617: [0.5050259,
  0.5050259,
  0.5055545,
  0.5050259,
  0.5055545,
  0.5055545,
  0.16,
  0.017391304,
  0.021389553,
  0.013071896,
  0.01056338,
  0.012515055],
 2784: [0.5050259,
  0.5050259,
  0.5,
  0.5055545,
  0.5050259,
  0.5055545,
  0.16,
  0.017391304,
  0.021389553,
  0.012515055],
 956: [0.5050259,
  0.5050259,
  0.5,
  0.5050259,
  0.5050259,
  0.5050259,
  0.5,
  0.5,
  0.5,
