In [1]:
import sys
import random
import pickle
from os.path import join

sys.path.append('..')

import numpy as np
import networkx as nx

from cnrg.VRG import VRG as VRG
from cnrg.LightMultiGraph import LightMultiGraph as LightMultiGraph

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
sys.path.append('../src')
from data import load_data
from data_old import read_data
from bookkeeping import convert_LMG, decompose
from update_grammar import update_grammar

In [None]:
print('change 10 -> 15 and fix that error on "graphs = graphs[0:10]"')

# independent sequential experiments

In [5]:
dataname = 'fb-messages'
lookbacks = [0]

base_grammars = {lookback: [] for lookback in lookbacks}
joint_grammars = {lookback: [] for lookback in lookbacks}
indep_grammars = {lookback: [] for lookback in lookbacks}

base_mdls = {lookback: [] for lookback in lookbacks}
joint_mdls = {lookback: [] for lookback in lookbacks}
joint_lls = {lookback: [] for lookback in lookbacks}
indep_mdls = {lookback: [] for lookback in lookbacks}
indep_lls = {lookback: [] for lookback in lookbacks}

mu = 4
for lookback in lookbacks:
    # loaded = load_data(dataname=dataname, lookback=lookback)
    # graphs = [g for _, g in loaded]
    graphs, years = read_data(dataname=dataname, lookback=lookback)
    
    graphs = graphs[0:]
    
    base_mdl = []
    joint_ll = []
    joint_mdl = []
    indep_ll = []
    indep_mdl = []

    for idx, (home_graph, away_graph) in enumerate(zip(graphs[:-1], graphs[1:])):
        base_grammar = decompose(home_graph, mu=mu)
        joint_grammar = update_grammar(base_grammar, home_graph, away_graph, mode='joint')
        indep_grammar = update_grammar(base_grammar, home_graph, away_graph, mode='independent')
        
        base_grammars[lookback] += [base_grammar]
        joint_grammars[lookback] += [joint_grammar]
        indep_grammars[lookback] += [indep_grammar]
        
        base_mdl += [base_grammar.calculate_cost()]
        
        joint_mdl += [joint_grammar.calculate_cost()]
        joint_ll += [joint_grammar.conditional_ll()]
        
        joint_mdls[lookback] += [joint_grammar.calculate_cost()]
        joint_lls[lookback] += [joint_grammar.conditional_ll()]
        indep_mdls[lookback] += [indep_grammar.calculate_cost()]
        indep_lls[lookback] += [indep_grammar.conditional_ll()]
    
    base_grammar = decompose(graphs[-1], mu=mu)
    base_grammars[lookback] += [base_grammar]
    base_mdls[lookback] += [base_grammar.calculate_cost()]

    with open(f'../results/experiment_sequential/{dataname}_base.grammars', 'wb') as outfile:
        pickle.dump(base_grammars, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.grammars', 'wb') as outfile:
        pickle.dump(joint_grammars, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.grammars', 'wb') as outfile:
        pickle.dump(indep_grammars, outfile)

    with open(f'../results/experiment_sequential/{dataname}_base.mdls', 'wb') as outfile:
        pickle.dump(base_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.mdls', 'wb') as outfile:
        pickle.dump(joint_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.lls', 'wb') as outfile:
        pickle.dump(joint_lls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.mdls', 'wb') as outfile:
        pickle.dump(indep_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.lls', 'wb') as outfile:
        pickle.dump(indep_lls, outfile)

100%|███████████████████████████████|[00:00<00:00]

joint changes: 1:   0%|          | 0/71 [00:00<?, ?it/s]
  0%|                                   |[00:00<?][A
100%|███████████████████████████████|[00:00<00:00][A

joint changes: 1:   1%|▏         | 1/71 [00:01<01:51,  1.60s/it]
100%|███████████████████████████████|[00:00<00:00][A

joint changes: 1: 100%|██████████| 71/71 [00:01<00:00, 43.07it/s]
additions: 1: 100%|██████████| 459/459 [00:03<00:00, 131.66it/s]
additions: 2: 100%|██████████| 5/5 [00:00<00:00, 245.40it/s]
additions: 1:  18%|█▊        | 147/830 [00:01<00:04, 137.69it/s]


KeyboardInterrupt: 

In [4]:
loaded = load_data(dataname='email-dnc', lookback=0)
graphs = [g for _, g in loaded]
[(len(set(cur.nodes()) & set(nxt.nodes())), len(set(cur.edges()) & set(nxt.edges()))) for cur, nxt in zip(graphs[:-1], graphs[1:])]

[(0, 0),
 (1, 0),
 (1, 0),
 (0, 0),
 (0, 0),
 (2, 1),
 (1, 0),
 (1, 0),
 (0, 0),
 (0, 0),
 (13, 13),
 (12, 11),
 (12, 11),
 (23, 24),
 (21, 22),
 (19, 15),
 (456, 875)]

In [9]:
graphs, _ = read_data(dataname='fb-messages', lookback=0)
[(len(set(cur.nodes()) & set(nxt.nodes())), len(set(cur.edges()) & set(nxt.edges()))) for cur, nxt in zip(graphs[:-1], graphs[1:])]

[(209, 0),
 (323, 0),
 (396, 0),
 (414, 0),
 (422, 0),
 (457, 0),
 (465, 0),
 (467, 0),
 (389, 0)]

In [None]:
joint_lls

In [None]:
indep_lls

In [None]:
years = [t for t, _ in loaded]
years = years[0:len(graphs)-1]

In [None]:
#with plt.style.context(['ipynb', 'use_mathtext', 'colors5-light']):
plt.title(f'sequential experiments: {dataname}, lookback {lookback}')
plt.xlabel('time')
plt.ylabel('log likelihood')
plt.plot(range(len(years)), joint_lls[1], label='joint model')
plt.plot(range(len(years)), indep_lls[1], label='independent model')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.ticklabel_format(style='plain')
    #plt.savefig(f'../figures/exp3_{dataname}_{cumulative}.svg')

In [None]:
#with plt.style.context(['ipynb', 'use_mathtext', 'colors5-light']):
plt.title(f'sequential experiments: {dataname}, lookback {lookback}')
plt.xlabel('time')
plt.ylabel('minimal description length')
plt.plot(range(len(years)), joint_mdls[1], label='joint model')
plt.plot(range(len(years)), indep_mdls[1], label='independent model')
plt.plot(range(len(years)), base_mdls[1], label='base model')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.ticklabel_format(style='plain')
    #plt.savefig(f'../figures/exp3_{dataname}_{cumulative}.svg')

# accumulated sequential experiments