In [1]:
import sys
import random
import pickle
from os.path import join

sys.path.append('..')

import numpy as np
import networkx as nx

from cnrg.VRG import VRG as VRG
from cnrg.LightMultiGraph import convert
from cnrg.LightMultiGraph import LightMultiGraph as LightMultiGraph

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
sys.path.append('../src')
from utils import silence
from data import load_data
# from data_old import read_data
from decomposition import decompose, decompose_component
from graph_transitions import update_grammar
from utils import silence, find, replace
from utils import graph_edit_distance as ged

In [3]:
loaded = load_data(dataname='email-eucore', lookback=0)
graphs = [g for _, g in loaded]

In [4]:
grammar = decompose(graphs[0], time=0)

100%|███████████████████████████████|[00:00<00:00]
100%|███████████████████████████████|[00:00<00:00]
100%|███████████████████████████████|[00:00<00:00]
100%|███████████████████████████████|[00:00<00:00]
100%|███████████████████████████████|[00:00<00:00]


In [5]:
if True:
    joint_grammar = update_grammar(grammar, graphs[0], graphs[1], 1, mode='joint')
    indep_grammar = update_grammar(grammar, graphs[0], graphs[1], 1, mode='indep')

joint changes: 100%|██████████| 44/44 [01:38<00:00,  2.24s/it]
additions: 100%|██████████| 2207/2207 [00:01<00:00, 1356.32it/s]


graph: , mu: 4, type: mu_level_dl clustering: leiden rules: 245(244) mdl: 27_455.5 bits


additions: 100%|██████████| 2289/2289 [00:00<00:00, 2348.54it/s]

graph: , mu: 4, type: mu_level_dl clustering: leiden rules: 235(234) mdl: 26_291.1 bits





In [6]:
display(joint_grammar.ll(), indep_grammar.ll())

2244.0

2283

In [7]:
for rule in joint_grammar.rule_list:
    if rule.time_changed > 0:
        print(rule.time, rule.time_changed, rule.edit_cost)

0 1 2207
1 1 0
1 1 6.0
1 1 4.0
1 1 6.0
1 1 2.0
1 1 12.0
1 1 2.0
1 1 4.0
1 1 0
1 1 1.0


In [8]:
for rule in indep_grammar.rule_list:
    if rule.time_changed > 0:
        print(rule.time, rule.time_changed, rule.edit_cost)

0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 2
0 1 1
0 1 3
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 2
0 1 1
0 1 1
0 1 3
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 1
0 1 2
0 1 4
0 1 1
0 1 1
0 1 1
0 1 2
0 1 1
0 1 1
0 1 1
0 1 4
0 1 2
0 1 1
0 1 1
0 1 2
0 1 1
0 1 2
0 1 1
0 1 2
0 1 1
0 1 4
0 1 1
0 1 2
0 1 1
0 1 1
0 1 1
0 1 2207
0 1 1


In [None]:
old = [rule for rule in joint_grammar.rule_list if rule.time == 0]
new = [rule for rule in joint_grammar.rule_list if rule.time == 1]
new_rule = new[0]
print(len(old), len(new))

In [None]:
for rule in joint_grammar.rule_dict[new_rule.lhs]:
    if rule == new_rule and rule is not new_rule:
        break
else:
    rule = None

In [None]:
dists = [ged(rule.graph, new_rule.graph)
         for rule in joint_grammar.rule_dict[new_rule.lhs]
         if rule is not new_rule]
min(dists)

In [None]:
dists

In [None]:
grammar.rule_dict[0][2].graph.edges(data=True)

In [None]:
h = nx.Graph()
h.add_nodes_from([0, 1, 2, 3, 4, 5])
h.add_edges_from([(i, i + 1) for i in range(5)])
hgrammar = decompose(h)
hgrammarc = decompose_component(h)

In [None]:
hgrammar.rule_list

In [None]:
for rule in hgrammarc.rule_list:
    print(f'{rule.lhs} -> ', end='')
    labels = nx.get_node_attributes(rule.graph, 'label')  # | nx.get_node_attributes(rule.graph, 'b_deg')
    nx.draw(rule.graph, labels=labels)
    plt.show()

In [None]:
grammar.rule_dict[0][1].graph.nodes(data=True)

In [None]:
for s in nx.connected_components(graphs[0]):
    print(graphs[0].subgraph(s).size())

In [None]:
newedgesizes = [rule.graph.size() for rule in new_grammar.rule_list]
display(sorted(edgesizes))

In [None]:
for rule in new_grammar.rule_list:
    if rule.edit_cost != 0:
        print(rule.edit_cost)

In [None]:
len(new_grammar.rule_list) - len(grammar.rule_list)

In [None]:
for rule in new_grammar.rule_list:
    print(rule.edit_cost)

In [None]:
for rule in new_grammar.rule_list:
    print(rule.time)

In [None]:
r0 = grammar.rule_list[0]
r0.time

In [None]:
g0 = grammar.rule_list[0].graph
print(g0.order(), g0.size())

In [None]:
grammar.rule_list[6].graph.nodes(data=True)

In [None]:
def node_match(d1, d2):
    if d1.keys() != d2.keys():
        return False
    
    if 'label' in d1 and d1['label'] != d2['label']:
        return False
    
    if 'b_deg' in d1 and d1['b_deg'] != d2['b_deg']:
        return False
    
    return True

In [None]:
%%time
edit_dists = [nx.graph_edit_distance(g0, rule.graph, node_match=node_match, timeout=None)
              for rule in grammar.rule_list]

In [None]:
edit_dists

In [None]:
%%time
opt_edit_dists = []
for rule in grammar.rule_list:
    g = rule.graph
    for val in nx.optimize_graph_edit_distance(g0, g, node_match=node_match):
        # print('.', end='')
        minval = val
    opt_edit_dists += [minval]
    # print(f'min: {minval}')

In [None]:
edit_dists == opt_edit_dists

# independent sequential experiments

In [None]:
dataname = 'email-eucore'
# facebook-links ['weight'] edge missing?
lookbacks = [0]

base_grammars = {lookback: [] for lookback in lookbacks}
joint_grammars = {lookback: [] for lookback in lookbacks}
indep_grammars = {lookback: [] for lookback in lookbacks}

base_mdls = {lookback: [] for lookback in lookbacks}
joint_mdls = {lookback: [] for lookback in lookbacks}
joint_lls = {lookback: [] for lookback in lookbacks}
indep_mdls = {lookback: [] for lookback in lookbacks}
indep_lls = {lookback: [] for lookback in lookbacks}

mu = 4
for lookback in lookbacks:
    loaded = load_data(dataname=dataname, lookback=lookback)
    graphs = [g for _, g in loaded]
    years = [t for t, _ in loaded][:len(graphs) - 1]
    
    # graphs, years = read_data(dataname=dataname, lookback=lookback)
    
    graphs = graphs[0:]
    
    base_mdl = []
    joint_ll = []
    joint_mdl = []
    indep_ll = []
    indep_mdl = []

    for idx, (home_graph, away_graph) in enumerate(zip(graphs[:-1], graphs[1:])):
        base_grammar = decompose(home_graph, mu=mu)
        joint_grammar = update_grammar(base_grammar, home_graph, away_graph, mode='joint')
        indep_grammar = update_grammar(base_grammar, home_graph, away_graph, mode='independent')
        
        base_grammars[lookback] += [base_grammar]
        joint_grammars[lookback] += [joint_grammar]
        indep_grammars[lookback] += [indep_grammar]
        
        joint_mdl += [joint_grammar.calculate_cost()]
        joint_ll += [joint_grammar.conditional_ll()]
        
        joint_mdls[lookback] += [joint_grammar.calculate_cost()]
        joint_lls[lookback] += [joint_grammar.conditional_ll()]
        indep_mdls[lookback] += [indep_grammar.calculate_cost()]
        indep_lls[lookback] += [indep_grammar.conditional_ll()]
        
        base_mdls[lookback] += [base_grammar.calculate_cost()]
    
    base_grammar = decompose(graphs[-1], mu=mu)
    base_grammars[lookback] += [base_grammar]
    base_mdls[lookback] += [base_grammar.calculate_cost()]

    with open(f'../results/experiment_sequential/{dataname}_base.grammars', 'wb') as outfile:
        pickle.dump(base_grammars, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.grammars', 'wb') as outfile:
        pickle.dump(joint_grammars, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.grammars', 'wb') as outfile:
        pickle.dump(indep_grammars, outfile)

    with open(f'../results/experiment_sequential/{dataname}_base.mdls', 'wb') as outfile:
        pickle.dump(base_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.mdls', 'wb') as outfile:
        pickle.dump(joint_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.lls', 'wb') as outfile:
        pickle.dump(joint_lls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.mdls', 'wb') as outfile:
        pickle.dump(indep_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.lls', 'wb') as outfile:
        pickle.dump(indep_lls, outfile)

In [None]:
print('hi')

In [None]:
loaded = load_data(dataname='email-dnc', lookback=0)
graphs = [g for _, g in loaded]
[(g.order(), g.size()) for g in graphs]

In [None]:
loaded = load_data(dataname='facebook-links', lookback=0)
graphs = [g for _, g in loaded]
[(g.order(), g.size()) for g in graphs]
#[(len(set(cur.nodes()) & set(nxt.nodes())), len(set(cur.edges()) & set(nxt.edges()))) for cur, nxt in zip(graphs[:-1], graphs[1:])]

In [None]:
graphs, _ = read_data(dataname='fb-messages', lookback=0)
[(len(set(cur.nodes()) & set(nxt.nodes())), len(set(cur.edges()) & set(nxt.edges()))) for cur, nxt in zip(graphs[:-1], graphs[1:])]

In [None]:
joint_lls

In [None]:
indep_lls

In [None]:
#with plt.style.context(['ipynb', 'use_mathtext', 'colors5-light']):
plt.title(f'sequential experiments: {dataname}, lookback {lookback}')
plt.xlabel('time')
plt.ylabel('log likelihood')
plt.plot(range(len(years)), joint_lls[0], label='joint model')
plt.plot(range(len(years)), indep_lls[0], label='independent model')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.ticklabel_format(style='plain')
    #plt.savefig(f'../figures/exp3_{dataname}_{cumulative}.svg')

In [None]:
#with plt.style.context(['ipynb', 'use_mathtext', 'colors5-light']):
plt.title(f'sequential experiments: {dataname}, lookback {lookback}')
plt.xlabel('time')
plt.ylabel('minimal description length')
plt.plot(range(len(years)), joint_mdls[0], label='joint model')
plt.plot(range(len(years)), indep_mdls[0], label='independent model')
plt.plot(range(len(years)), [base_mdls[0] for _ in years], label='base model')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.ticklabel_format(style='plain')
    #plt.savefig(f'../figures/exp3_{dataname}_{cumulative}.svg')

In [None]:
base_mdls

# accumulated sequential experiments