In [1]:
import sys
import random
import pickle
from os.path import join

sys.path.append('..')

import numpy as np
import networkx as nx

from cnrg.VRG import VRG as VRG
from cnrg.LightMultiGraph import LightMultiGraph as LightMultiGraph

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
sys.path.append('../src')
from utils import silence
from data import load_data
from data_old import read_data
from bookkeeping import convert_LMG, decompose, decompose_component
from update_grammar import update_grammar

In [3]:
loaded = load_data(dataname='facebook-links', lookback=0)
graphs = [g for _, g in loaded]

In [4]:
with silence():
    grammar = decompose(graphs[0])

In [5]:
print(grammar)

graph: , mu: 4, type: mu_level_dl clustering: leiden rules: 477(476) mdl: 22.288 bits


# independent sequential experiments

In [None]:
dataname = 'facebook-links'
# facebook-links ['weight'] edge missing?
lookbacks = [0]

base_grammars = {lookback: [] for lookback in lookbacks}
joint_grammars = {lookback: [] for lookback in lookbacks}
indep_grammars = {lookback: [] for lookback in lookbacks}

base_mdls = {lookback: [] for lookback in lookbacks}
joint_mdls = {lookback: [] for lookback in lookbacks}
joint_lls = {lookback: [] for lookback in lookbacks}
indep_mdls = {lookback: [] for lookback in lookbacks}
indep_lls = {lookback: [] for lookback in lookbacks}

mu = 4
for lookback in lookbacks:
    loaded = load_data(dataname=dataname, lookback=lookback)
    graphs = [g for _, g in loaded]
    years = [t for t, _ in loaded][:len(graphs) - 1]
    
    # graphs, years = read_data(dataname=dataname, lookback=lookback)
    
    graphs = graphs[0:]
    
    base_mdl = []
    joint_ll = []
    joint_mdl = []
    indep_ll = []
    indep_mdl = []

    for idx, (home_graph, away_graph) in enumerate(zip(graphs[:-1], graphs[1:])):
        base_grammar = decompose(home_graph, mu=mu)
        joint_grammar = update_grammar(base_grammar, home_graph, away_graph, mode='joint')
        indep_grammar = update_grammar(base_grammar, home_graph, away_graph, mode='independent')
        
        base_grammars[lookback] += [base_grammar]
        joint_grammars[lookback] += [joint_grammar]
        indep_grammars[lookback] += [indep_grammar]
        
        joint_mdl += [joint_grammar.calculate_cost()]
        joint_ll += [joint_grammar.conditional_ll()]
        
        joint_mdls[lookback] += [joint_grammar.calculate_cost()]
        joint_lls[lookback] += [joint_grammar.conditional_ll()]
        indep_mdls[lookback] += [indep_grammar.calculate_cost()]
        indep_lls[lookback] += [indep_grammar.conditional_ll()]
        
        base_mdls[lookback] += [base_grammar.calculate_cost()]
    
    base_grammar = decompose(graphs[-1], mu=mu)
    base_grammars[lookback] += [base_grammar]
    base_mdls[lookback] += [base_grammar.calculate_cost()]

    with open(f'../results/experiment_sequential/{dataname}_base.grammars', 'wb') as outfile:
        pickle.dump(base_grammars, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.grammars', 'wb') as outfile:
        pickle.dump(joint_grammars, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.grammars', 'wb') as outfile:
        pickle.dump(indep_grammars, outfile)

    with open(f'../results/experiment_sequential/{dataname}_base.mdls', 'wb') as outfile:
        pickle.dump(base_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.mdls', 'wb') as outfile:
        pickle.dump(joint_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_joint.lls', 'wb') as outfile:
        pickle.dump(joint_lls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.mdls', 'wb') as outfile:
        pickle.dump(indep_mdls, outfile)
    with open(f'../results/experiment_sequential/{dataname}_indep.lls', 'wb') as outfile:
        pickle.dump(indep_lls, outfile)

In [None]:
print('hi')

In [None]:
loaded = load_data(dataname='email-dnc', lookback=0)
graphs = [g for _, g in loaded]
[(g.order(), g.size()) for g in graphs]

In [None]:
loaded = load_data(dataname='facebook-links', lookback=0)
graphs = [g for _, g in loaded]
[(g.order(), g.size()) for g in graphs]
#[(len(set(cur.nodes()) & set(nxt.nodes())), len(set(cur.edges()) & set(nxt.edges()))) for cur, nxt in zip(graphs[:-1], graphs[1:])]

In [None]:
graphs, _ = read_data(dataname='fb-messages', lookback=0)
[(len(set(cur.nodes()) & set(nxt.nodes())), len(set(cur.edges()) & set(nxt.edges()))) for cur, nxt in zip(graphs[:-1], graphs[1:])]

In [None]:
joint_lls

In [None]:
indep_lls

In [None]:
#with plt.style.context(['ipynb', 'use_mathtext', 'colors5-light']):
plt.title(f'sequential experiments: {dataname}, lookback {lookback}')
plt.xlabel('time')
plt.ylabel('log likelihood')
plt.plot(range(len(years)), joint_lls[0], label='joint model')
plt.plot(range(len(years)), indep_lls[0], label='independent model')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.ticklabel_format(style='plain')
    #plt.savefig(f'../figures/exp3_{dataname}_{cumulative}.svg')

In [None]:
#with plt.style.context(['ipynb', 'use_mathtext', 'colors5-light']):
plt.title(f'sequential experiments: {dataname}, lookback {lookback}')
plt.xlabel('time')
plt.ylabel('minimal description length')
plt.plot(range(len(years)), joint_mdls[0], label='joint model')
plt.plot(range(len(years)), indep_mdls[0], label='independent model')
plt.plot(range(len(years)), [base_mdls[0] for _ in years], label='base model')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.ticklabel_format(style='plain')
    #plt.savefig(f'../figures/exp3_{dataname}_{cumulative}.svg')

In [None]:
base_mdls

# accumulated sequential experiments