In [6]:
import os
import pickle

IRIDIUM_DATA_DIR = 'IridiumDataSet14day20sec'
IRIDIUM_DATA_TEMPLATE = 'Iridium_DataSetForAgent_Day{}.pkl'
IRIDIUM_DATA_DAY_NUM = 14
'''
Every topology last for 5 min.
The topologies loop every 100 min.
So, the topologies repeat every 300 data points.
'''
IRIDIUM_TOPO_LOOP = 300
IRIDIUM_NODE_NUM = 66

    
def load_day(day):
    filename = os.path.join(IRIDIUM_DATA_DIR, IRIDIUM_DATA_TEMPLATE.format(day))
    with open(filename, 'rb') as f:
        d = pickle.load(f)
    data = {'flowset': d[0], 'topology': d[1], 'isl_cap': d[2], 'uplink_cap': d[3], 'downlink_cap': d[4]}
    return data

In [7]:
example_day = load_day(0)

In [8]:
def topo_type(index: int):
    return index % IRIDIUM_TOPO_LOOP

In [9]:
import copy
import json

TOPOLOGIES_DIR = '../topologies'
TOPOLOGY_TEMPLATE = 'IridiumCat{}.json'
    
def generate_topo_jsons():

    topo_all = example_day['topology'][:IRIDIUM_TOPO_LOOP]
    isl_cap = example_day['isl_cap']

    template = {
        "directed": True,
        "multigraph": False,
        "graph": {},
        "nodes": [{"id": n} for n in range(IRIDIUM_NODE_NUM)],
    }
        
    if not os.path.exists(TOPOLOGIES_DIR):
        os.makedirs(TOPOLOGIES_DIR)
    
    for cat in range(IRIDIUM_TOPO_LOOP):
        topo_cat = topo_all[cat]
        assert len(topo_cat[0]) == IRIDIUM_NODE_NUM # Make sure the number of nodes is consistent

        links = []
        for i in topo_cat[2]:
            assert i[0] < IRIDIUM_NODE_NUM and i[1] < IRIDIUM_NODE_NUM # Make sure the node index is within the range
            links.append({
                'capacity': isl_cap,
                'source': i[0],
                'target': i[1]
            })
            assert topo_cat[0][i[0]][i[1]] == 1 # Check consistency with the first representation
            assert topo_cat[1][i[0]][i[1]] != -1 # Check consistency with the second representation

        topo_obj = copy.deepcopy(template)
        topo_obj['links'] = links

        with open(os.path.join(TOPOLOGIES_DIR, TOPOLOGY_TEMPLATE.format(cat)), 'w') as f:
            f.write(json.dumps(topo_obj, indent=4))

generate_topo_jsons()

In [3]:
import numpy as np

TRAFFIC_MATRICES_DIR = '../traffic-matrices/toy'
TRAFFIC_MATRIX_TEMPLATE = 'IridiumCat{}.json_toy_{}_1.0_traffic-matrix.pkl'

def generate_tm_ndarray():
    
    if not os.path.exists(TRAFFIC_MATRICES_DIR):
        os.makedirs(TRAFFIC_MATRICES_DIR)

    cat_num_rec = [0 for _ in range(IRIDIUM_TOPO_LOOP)]

    for d in range(IRIDIUM_DATA_DAY_NUM): # Every day
        day_data = load_day(d)
        day_flow = day_data['flowset']

        num = len(day_flow) - (len(day_flow) % IRIDIUM_TOPO_LOOP)
        for i in range(num):
            cat = topo_type(i)
            idx = cat_num_rec[cat]
            cat_num_rec[cat] += 1

            tm = np.zeros((IRIDIUM_NODE_NUM, IRIDIUM_NODE_NUM))
            snapshot_flow = day_flow[i]
            for flow in snapshot_flow:
                tm[flow[0], flow[1][0]] = flow[2]

            with open(os.path.join(TRAFFIC_MATRICES_DIR, TRAFFIC_MATRIX_TEMPLATE.format(cat, idx)), 'wb') as f:
                pickle.dump(tm, f)


generate_tm_ndarray()

In [None]:
import os
import glob
from multiprocessing import Pool

TRAFFIC_MATRICES_DIR = '../traffic-matrices/toy'

def delete_file(file_path):
    try:
        os.remove(file_path)
        print(f"Deleted: {file_path}")
    except OSError as e:
        print(f"Error: {file_path} : {e.strerror}")

def delete_iridium_files_multiprocessing():
    iridium_files = glob.glob(os.path.join(TRAFFIC_MATRICES_DIR, 'Iridium*'))

    with Pool(processes=os.cpu_count()) as pool:
        pool.map(delete_file, iridium_files)

delete_iridium_files_multiprocessing()


In [None]:
from multiprocessing import Pool, Array, Lock
import os
import numpy as np
import pickle

TRAFFIC_MATRICES_DIR = '../traffic-matrices/toy'
TRAFFIC_MATRIX_TEMPLATE = 'IridiumCat{}.json_toy_{}_1.0_traffic-matrix.pkl'

def init(args):
    global cat_num_rec, lock
    cat_num_rec, lock = args

def process_day(d):
    if not os.path.exists(TRAFFIC_MATRICES_DIR):
        os.makedirs(TRAFFIC_MATRICES_DIR)

    day_data = load_day(d)
    day_flow = day_data['flowset']

    num = len(day_flow) - (len(day_flow) % IRIDIUM_TOPO_LOOP)
    for i in range(num):
        cat = topo_type(i)

        with lock:
            idx = cat_num_rec[cat]
            cat_num_rec[cat] += 1

        tm = np.zeros((IRIDIUM_NODE_NUM, IRIDIUM_NODE_NUM))
        snapshot_flow = day_flow[i]
        for flow in snapshot_flow:
            tm[flow[0], flow[1][0]] = flow[2]

        with open(os.path.join(TRAFFIC_MATRICES_DIR, TRAFFIC_MATRIX_TEMPLATE.format(cat, idx)), 'wb') as f:
            pickle.dump(tm, f)

def generate_tm_ndarray():
    shared_cat_num_rec = Array('i', IRIDIUM_TOPO_LOOP)
    lock = Lock() 
    with Pool(processes=os.cpu_count(), initializer=init, initargs=((shared_cat_num_rec, lock),)) as pool:
        pool.map(process_day, range(IRIDIUM_DATA_DAY_NUM))

generate_tm_ndarray()


In [None]:
import dgl
import numpy as np
import torch as th
from dgl.nn import EdgeGATConv

# Case 1: Homogeneous graph.
num_nodes, num_edges = 8, 30
# Generate a graph.
graph = dgl.rand_graph(num_nodes,num_edges)
node_feats = th.rand((num_nodes, 20))
edge_feats = th.rand((num_edges, 12))
edge_gat = EdgeGATConv(
    in_feats=20,
    edge_feats=12,
    out_feats=15,
    num_heads=3,
)
# Forward pass.
new_node_feats = edge_gat(graph, node_feats, edge_feats)
new_node_feats.shape

In [3]:
from collections import defaultdict
from glob import iglob

import argparse
import os
import sys
from tqdm import tqdm

sys.path.append("..")

from lib.config import TOPOLOGIES_DIR, TM_DIR

PROBLEM_NAMES = [
    'B4.json',
    'UsCarrier.json',
    'Kdl.json',
    'ASN2k.json',
]
PROBLEM_NAMES += [f"IridiumCat{i}.json" for i in range(5)]

CONSTELLATIONS = [
    'Iridium'
]
TM_MODELS = [
    "real",
    "toy",
]
SCALE_FACTORS = [1.0]
OBJ_STRS = ["total_flow", "min_max_link_util"]

PATH_FORM_HYPERPARAMS = (4, True, "min-hop")

PROBLEM_NAMES_AND_TM_MODELS = [
    (prob_name, tm_model) for prob_name in PROBLEM_NAMES
    for tm_model in TM_MODELS
]

PROBLEMS = []
GROUPED_BY_PROBLEMS = defaultdict(list)
GROUPED_BY_CONSTELLATION = defaultdict(list)
HOLDOUT_PROBLEMS = []
GROUPED_BY_HOLDOUT_PROBLEMS = defaultdict(list)

for problem_name in tqdm(PROBLEM_NAMES, desc="reading traffic matrix"):
    if problem_name.endswith(".graphml"):
        topo_fname = os.path.join(TOPOLOGIES_DIR, "topology-zoo", problem_name)
    else:
        topo_fname = os.path.join(TOPOLOGIES_DIR, problem_name)
    for model in TM_MODELS:
        for tm_fname in iglob(
            "{}/{}/{}*_traffic-matrix.pkl".format(TM_DIR, model, problem_name)
        ):
            vals = os.path.basename(tm_fname)[:-4].split("_")
            _, traffic_seed, scale_factor = vals[1], int(vals[2]),\
                float(vals[3])
            GROUPED_BY_PROBLEMS[(problem_name, model, scale_factor)].append(
                (topo_fname, tm_fname)
            )
            PROBLEMS.append((problem_name, topo_fname, tm_fname))
        for tm_fname in iglob(
            "{}/holdout/{}/{}*_traffic-matrix.pkl".format(
                TM_DIR, model, problem_name
            )
        ):
            vals = os.path.basename(tm_fname)[:-4].split("_")
            _, traffic_seed, scale_factor = vals[1], int(vals[2]),\
                float(vals[3])
            GROUPED_BY_HOLDOUT_PROBLEMS[(problem_name, model, scale_factor)]\
                .append(
                    (topo_fname, tm_fname)
            )
            HOLDOUT_PROBLEMS.append((problem_name, topo_fname, tm_fname))

GROUPED_BY_PROBLEMS = dict(GROUPED_BY_PROBLEMS)
for key, vals in GROUPED_BY_PROBLEMS.items():
    GROUPED_BY_PROBLEMS[key] = sorted(
        vals, key=lambda x: int(x[-1].split('_')[-3]))

reading traffic matrix: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:11<00:00,  1.30s/it]


In [5]:
GROUPED_BY_PROBLEMS[('IridiumCat4.json', 'toy', 1.0)]

[('/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/topologies/IridiumCat4.json',
  '/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/traffic-matrices/toy/IridiumCat4.json_toy_0_1.0_traffic-matrix.pkl'),
 ('/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/topologies/IridiumCat4.json',
  '/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/traffic-matrices/toy/IridiumCat4.json_toy_1_1.0_traffic-matrix.pkl'),
 ('/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/topologies/IridiumCat4.json',
  '/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/traffic-matrices/toy/IridiumCat4.json_toy_2_1.0_traffic-matrix.pkl'),
 ('/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/teal/topologies/IridiumCat4.json',
  '/mnt/batch/tasks/shared/LS_root/mounts/clusters/hana100/code/Users/e1310988/tea

In [14]:
def get_constellation_problems(args):
    problems = []
    constellation = args['constellation']
    for i in tqdm(range(args['num_topo']), desc="reading topologies"):
        topo_key = f"{constellation}Cat{i}.json"
        key = (topo_key, args['tm_model'], args['scale_factor'])
        if key not in GROUPED_BY_PROBLEMS:
            raise Exception(f'Traffic matrices not found for {topo_key}')
        for topo_fname, tm_fname in GROUPED_BY_PROBLEMS[key]:
            problems.append((topo_key, topo_fname, tm_fname))

    problems.sort(key=lambda x: int(x[2].split('_')[-3]))
    return problems

args = {"num_topo": 300,
        "constellation": "Iridium",
        "tm_model": "toy",
        "scale_factor": 1.0}

problems = get_constellation_problems(args)

reading topologies: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 33799.59it/s]


In [2]:
import dgl

dataset = dgl.data.rdf.AIFBDataset(
            insert_reverse=True, force_reload=False)

Done loading data from cached files.


In [3]:
graph = dataset[0]

In [4]:
graph

Graph(num_nodes={'Forschungsgebiete': 146, 'Forschungsgruppen': 5, 'Kooperationen': 28, 'Personen': 237, 'Projekte': 78, 'Publikationen': 1318, '_Literal': 5450},
      num_edges={('Forschungsgebiete', 'ontology#dealtWithIn', 'Projekte'): 357, ('Forschungsgebiete', 'ontology#isWorkedOnBy', 'Personen'): 571, ('Forschungsgebiete', 'ontology#name', '_Literal'): 146, ('Forschungsgebiete', 'rdftype', '_Literal'): 129, ('Forschungsgebiete', 'rev-ontology#isAbout', 'Projekte'): 357, ('Forschungsgebiete', 'rev-ontology#isAbout', 'Publikationen'): 2120, ('Forschungsgruppen', 'ontology#carriesOut', 'Projekte'): 79, ('Forschungsgruppen', 'ontology#head', 'Personen'): 5, ('Forschungsgruppen', 'ontology#homepage', '_Literal'): 5, ('Forschungsgruppen', 'ontology#member', 'Personen'): 74, ('Forschungsgruppen', 'ontology#name', '_Literal'): 5, ('Forschungsgruppen', 'ontology#publishes', 'Publikationen'): 1217, ('Forschungsgruppen', 'rev-ontology#carriedOutBy', 'Projekte'): 79, ('Kooperationen', 'ontol

In [10]:
category = dataset.predict_category

In [6]:
dataset.num_classes

4

In [8]:
graph.ntypes

['Forschungsgebiete',
 'Forschungsgruppen',
 'Kooperationen',
 'Personen',
 'Projekte',
 'Publikationen',
 '_Literal']

In [15]:
import torch

degree_cutting = 10

for ntype in graph.ntypes:
    if ntype != category:
        graph.nodes[ntype].data["labels"] = torch.zeros(
            graph.num_nodes(ntype), dtype=torch.int64)
        graph.nodes[ntype].data["test_mask"] = torch.zeros(
            graph.num_nodes(ntype), dtype=torch.bool)
        graph.nodes[ntype].data["train_mask"] = torch.zeros(
            graph.num_nodes(ntype), dtype=torch.bool)

# Convert to homogeneous graph and perform degree cutting
hom_graph = dgl.to_homogeneous(
    graph, ndata=["labels", "test_mask", "train_mask"])
degrees = hom_graph.in_degrees() + hom_graph.out_degrees()
degree_mask = degrees <= degree_cutting
category_index = graph.ntypes.index(category)
category_mask = hom_graph.ndata["_TYPE"] != category_index

# All nodes below or equal the defined degree that do not belong to the category type are cut
combined_masks = torch.logical_and(degree_mask, category_mask)
hom_graph.remove_nodes(combined_masks.nonzero(as_tuple=True)[0])

# Go back to heterogeneous graph
graph = dgl.to_heterogeneous(hom_graph, graph.ntypes, graph.etypes)

# Get number of final nodes for each node type
in_nodes = dict()
for ntype in graph.ntypes:
    in_nodes[ntype] = graph.num_nodes(ntype)

In [17]:
in_nodes

{'Forschungsgebiete': 115,
 'Forschungsgruppen': 4,
 'Kooperationen': 5,
 'Personen': 237,
 'Projekte': 75,
 'Publikationen': 1244,
 '_Literal': 65}

In [30]:
len(graph.canonical_etypes)

78

In [20]:
full_graph_conv = torch.nn.ModuleDict()
full_graph_conv["conv_1"] = torch.nn.ModuleDict()
full_graph_conv["conv_2"] = torch.nn.ModuleDict()
full_graph_conv["conv_3"] = torch.nn.ModuleDict()
full_graph_conv["conv_4"] = torch.nn.ModuleDict()
for edge in graph.canonical_etypes:
    which_graph_conv = None
    if (edge[0] == edge[2]) and (edge[0] != category):
        # Self-conv excluding the target node
        which_graph_conv = "conv_1"
    elif (edge[0] != edge[2]) and (edge[2] != category):
        # Conv from all nodes to others but the target node
        which_graph_conv = "conv_2"
    elif (edge[2] == category) and (edge[0] != category):
        # Conv to the target node
        which_graph_conv = "conv_3"
    elif (edge[2] == category) and (edge[0] == category):
        # Self update
        which_graph_conv = "conv_4"
    else:
        NotImplementedError(
            f"Undefined graph convolution for edge {edge}")

    if which_graph_conv is not None:
        full_graph_conv[which_graph_conv][str(edge)] = dgl.nn.GATConv(1, 1, 1)

In [31]:
conv_dict = full_graph_conv['conv_1']

In [32]:
conv_dict_key_tuples = [
                tuple(map(str, string[2:-2].split("', '"))) for string in conv_dict.keys()]

In [33]:
conv_dict_key_tuples

[('Publikationen', 'ontology#author', 'Publikationen'),
 ('Publikationen', 'ontology#editor', 'Publikationen'),
 ('Publikationen', 'ontology#publication', 'Publikationen'),
 ('Publikationen', 'rev-ontology#author', 'Publikationen'),
 ('Publikationen', 'rev-ontology#editor', 'Publikationen'),
 ('Publikationen', 'rev-ontology#publication', 'Publikationen')]

In [41]:
conv_dict

ModuleDict(
  (('Publikationen', 'ontology#author', 'Publikationen')): GATConv(
    (fc): Linear(in_features=1, out_features=1, bias=False)
    (feat_drop): Dropout(p=0.0, inplace=False)
    (attn_drop): Dropout(p=0.0, inplace=False)
    (leaky_relu): LeakyReLU(negative_slope=0.2)
  )
  (('Publikationen', 'ontology#editor', 'Publikationen')): GATConv(
    (fc): Linear(in_features=1, out_features=1, bias=False)
    (feat_drop): Dropout(p=0.0, inplace=False)
    (attn_drop): Dropout(p=0.0, inplace=False)
    (leaky_relu): LeakyReLU(negative_slope=0.2)
  )
  (('Publikationen', 'ontology#publication', 'Publikationen')): GATConv(
    (fc): Linear(in_features=1, out_features=1, bias=False)
    (feat_drop): Dropout(p=0.0, inplace=False)
    (attn_drop): Dropout(p=0.0, inplace=False)
    (leaky_relu): LeakyReLU(negative_slope=0.2)
  )
  (('Publikationen', 'rev-ontology#author', 'Publikationen')): GATConv(
    (fc): Linear(in_features=1, out_features=1, bias=False)
    (feat_drop): Dropout(p=0.

In [45]:
subgraph = graph.edge_type_subgraph([('Publikationen', 'ontology#author', 'Publikationen')])

In [47]:
subgraph.ndata['x']

KeyError: 'x'