In [25]:
import os
import random
import time
import collections
import torch
import itertools
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import torch
import dgl
import json
import pickle

from innvariant_dgl.deepgg import DeepGG
from scipy.stats import entropy
from innvariant_dgl.dataset import ConstructionSequenceDataset, generate_ba_model_construction_sequence, graph_to_construction_sequence, construction_sequence_to_graph, generate_ws_model_construction_sequence
from tqdm import tqdm

In [2]:
computation_base_paths = [
    '/media/data/computations/inn02-deep-graph-learning/drogon/',
    '/media/data/computations/inn02-deep-graph-learning/mackenzie/'
]

In [34]:
"""
Build up a mapping of [path] -> {
  'meta': {meta_dict from metadata file}
  'dataset_cs': [list of graphs in cs-form],
  'dataset_graph': [list of graphs],
  'generated_cs': [list of generated graphs in cs-form],
  'generated_graph': [list of generated graphs],
  'computation_time': float in secs given start_time of computation and creation time of generated graphs
}
"""

computations = {}
for base_path in computation_base_paths:
    for filename in os.listdir(base_path):
        if '-metadata-' in filename: 
            meta_file_path = os.path.join(base_path, filename)
            with open(meta_file_path, 'r') as mh:
                info_meta = json.load(mh)
                
            file_name_meta = filename
            file_name_generated = info_meta['files']['generated_file_name']
            file_name_model = info_meta['files']['model_file_name']
            file_name_dataset = info_meta['files']['dataset_file_name']
            
            if any([os.stat(os.path.join(base_path, name)).st_size < 1 for name in [file_name_meta, file_name_generated, file_name_model, file_name_dataset]]):
                # Skip whole computation if we have an empty file
                continue
            
            with open(os.path.join(base_path, file_name_generated)) as gh:
                info_generated = json.load(gh)
            
            with open(os.path.join(base_path, file_name_dataset)) as gh:
                info_dataset = json.load(gh)
            
            computation_time = info_generated['creation_time']-info_meta['start_time']

            computations[filename] = {
                'meta': info_meta,
                'base_path': base_path,
                'dataset_cs': info_dataset['construction_sequences'],
                'generated_cs': info_generated['construction_sequences'],
                'computation_time': computation_time
            }

In [35]:
selected_filename = next(iter(computations))
print(computations[selected_filename]['meta'])

{'start_time': 1589810152.43036, 'params': {'param_train_epochs': 8, 'param_learning_rate': 0.0001, 'param_deepgg_prop_rounds': 2, 'param_deepgg_v_max': 150, 'param_deepgg_node_hidden_size': 16, 'param_deepgg_generate_size': 200, 'param_deepgg_generate_v_min': 0, 'param_dataset_size': 1000, 'param_model_num_v': 50, 'param_model_num_v_min': 50, 'param_model_num_v_max': 50, 'param_model_er_p': 0.2, 'param_model_ws_k': 10, 'param_model_ws_p': 0.2, 'param_model_ba_m': 3, 'param_selected_model': 'generate_seqs_watts_strogatz_dfs', 'param_device': 'cuda:7'}, 'hyperparams': {'hp_deepgg_generate_v_min_range': [0, 40, 10], 'hp_dataset_size_range': [500, 2001, 500], 'hp_model_num_v_min_range': [20, 50, 5], 'hp_model_num_v_max_range': [50, 151, 10], 'hp_model_er_p_range': [0.1, 0.9, 0.2], 'hp_model_ws_k_range': [3, 12, 1], 'hp_model_ws_p_range': [0.1, 0.9, 0.2], 'hp_model_ba_m_range': [2, 9, 1]}, 'files': {'dataset_file_name': 'deepgg-t1589810152-ds-1000-generate_seqs_watts_strogatz_dfs.json', 'd

In [36]:
with open('deepgg-combined.pkl', 'wb') as write_handle:
    pickle.dump(computations, write_handle)