## generate ASN training dataset for TELGEN

In [1]:
from solver.linprog import linprog
from tqdm import tqdm

import gzip
import pickle
import torch
from scipy.linalg import LinAlgWarning
from scipy.optimize._optimize import OptimizeWarning
# from scipy.optimize import OptimizeWarning
import warnings
import numpy as np
from functools import partial
import random
import pickle 
import json

In [2]:
root = 'raw/'

### Resource Allocation

#### input: for one graph   
#### G(V, E, c): random graph (strongly connected)
#### (s, t, d) \in [S, T, D]  
#### for every (s, t, d), there is a set p \in Pd (k-shortest path algorithm (4/5/6))

In [3]:
import networkx as nx
import matplotlib.pyplot as plt

### generate and save connected and directed ASN different nodes and p

In [4]:
f = open(root+'asn_graph/ASN2k.json')
G = json.load(f)

asn_graph = nx.DiGraph()
asn_graph.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph.add_edges_from([(i['source'], i['target']) for i in G['links']])

print('Strongly connected:', nx.is_strongly_connected(asn_graph))
print('# of nodes and edges:', asn_graph.number_of_nodes(), asn_graph.number_of_edges())
print('Weighted:', nx.is_weighted(asn_graph))

asn_graph_weight = nx.DiGraph()
asn_graph_weight.add_nodes_from([i['id'] for i in G['nodes']])
for i in G['links']:
    asn_graph_weight.add_edge(i['source'], i['target'], weight=i['capacity'])
print('Strongly connected:', nx.is_strongly_connected(asn_graph_weight))
print('# of nodes and edges:', asn_graph_weight.number_of_nodes(), asn_graph_weight.number_of_edges())
print('Weighted:', nx.is_weighted(asn_graph_weight))

# nx.draw(asn_graph, with_labels=True, node_color='lightgreen', arrows=True)

Strongly connected: True
# of nodes and edges: 1739 8558
Weighted: False
Strongly connected: True
# of nodes and edges: 1739 8558
Weighted: True


In [5]:
# check asn link capacities
cap = []
for i in G['links']:
    cap.append(i['capacity'])
max(cap), min(cap)

(53750.0, 250.0)

### sample whole graph

In [6]:
random.seed(2024)
np.random.seed(2024)

asn_graph_test = nx.DiGraph()
asn_graph_test.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_test.add_edges_from([(i['source'], i['target']) for i in G['links']])

np.save(root+'asn_graph/test_'+str(asn_graph_test.number_of_nodes())+'_nodes', asn_graph_test)
print('Graph info', asn_graph_test.number_of_nodes(), asn_graph_test.number_of_edges())
print('Connected:', nx.is_strongly_connected(asn_graph_test))
print('Weighted:', nx.is_weighted(asn_graph_test))
print('---------------------------------------')

Graph info 1739 8558
Connected: True
Weighted: False
---------------------------------------


### generate nodes for train/test, random subgraph sampling

In [7]:
random.seed(2024)
np.random.seed(2024)

asn_graph_train = nx.DiGraph()
asn_graph_train.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_train.add_edges_from([(i['source'], i['target']) for i in G['links']])

asn_graph_test = nx.DiGraph()
asn_graph_test.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_test.add_edges_from([(i['source'], i['target']) for i in G['links']])


# train and test has no overlap; within train, no overlap; within test, can have overlap
train_num = [350, 500]
total_nodes = np.array([i for i in asn_graph_train.nodes()])

for i in train_num:
    nodes = np.random.choice(total_nodes, i, replace=False)
    sampled_subgraph = asn_graph_train.subgraph(nodes)
    cc = sorted(nx.strongly_connected_components(sampled_subgraph), key=len, reverse=True)[:1]
    for c in cc:
        c = list(c)
        subgraph = asn_graph_train.subgraph(c)
        np.save(root+'asn_graph/train_'+str(len(c))+'_nodes', c)
        print('Graph info', subgraph.number_of_nodes(), subgraph.number_of_edges())
        print('Connected:', nx.is_strongly_connected(subgraph))
        print('Weighted:', nx.is_weighted(subgraph))
        print('---------------------------------------')
    for n in nodes:
        total_nodes = np.delete(total_nodes, np.where(total_nodes == n))

# total_nodes is what is left for testing
asn_graph_test = asn_graph_test.subgraph(total_nodes)
print('Left Test Graph info', asn_graph_test.number_of_nodes(), asn_graph_test.number_of_edges())
print('Connected:', nx.is_strongly_connected(asn_graph_test))
print('Weighted:', nx.is_weighted(asn_graph_test))
print('---------------------------------------')

cc = sorted(nx.strongly_connected_components(asn_graph_test), key=len, reverse=True)[:1]
# 557, 32, 31, 21, 15, 15, 14, 11, 9......
for c in cc:
    c = list(c)
    np.save(root+'asn_graph/test_' + str(len(c)) + '_nodes', c)
    subgraph = asn_graph_test.subgraph(c)
    print('Test Graph info', subgraph.number_of_nodes(), subgraph.number_of_edges())
    print('Connected:', nx.is_strongly_connected(subgraph))
    print('Weighted:', nx.is_weighted(subgraph))

Graph info 217 590
Connected: True
Weighted: False
---------------------------------------
Graph info 237 586
Connected: True
Weighted: False
---------------------------------------
Left Test Graph info 889 1622
Connected: False
Weighted: False
---------------------------------------
Test Graph info 553 1532
Connected: True
Weighted: False


In [8]:
random.seed(2024)
np.random.seed(2024)

asn_graph_train = nx.DiGraph()
asn_graph_train.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_train.add_edges_from([(i['source'], i['target']) for i in G['links']])

asn_graph_test = nx.DiGraph()
asn_graph_test.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_test.add_edges_from([(i['source'], i['target']) for i in G['links']])


# train and test has no overlap; within train, no overlap; within test, can have overlap
train_num = [250]
total_nodes = np.array([i for i in asn_graph_train.nodes()])

for i in train_num:
    nodes = np.random.choice(total_nodes, i, replace=False)
    sampled_subgraph = asn_graph_train.subgraph(nodes)
    cc = sorted(nx.strongly_connected_components(sampled_subgraph), key=len, reverse=True)[:1]
    for c in cc:
        c = list(c)
        subgraph = asn_graph_train.subgraph(c)
        np.save(root+'asn_graph/train_'+str(len(c))+'_nodes', c)
        print('Graph info', subgraph.number_of_nodes(), subgraph.number_of_edges())
        print('Connected:', nx.is_strongly_connected(subgraph))
        print('Weighted:', nx.is_weighted(subgraph))
        print('---------------------------------------')

Graph info 98 202
Connected: True
Weighted: False
---------------------------------------


In [9]:
random.seed(2024)
np.random.seed(2024)

asn_graph_train = nx.DiGraph()
asn_graph_train.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_train.add_edges_from([(i['source'], i['target']) for i in G['links']])

asn_graph_test = nx.DiGraph()
asn_graph_test.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_test.add_edges_from([(i['source'], i['target']) for i in G['links']])


# train and test has no overlap; within train, no overlap; within test, can have overlap
train_num = [650]
total_nodes = np.array([i for i in asn_graph_train.nodes()])

for i in train_num:
    nodes = np.random.choice(total_nodes, i, replace=False)
    sampled_subgraph = asn_graph_train.subgraph(nodes)
    cc = sorted(nx.strongly_connected_components(sampled_subgraph), key=len, reverse=True)[:1]
    for c in cc:
        c = list(c)
        subgraph = asn_graph_train.subgraph(c)
        np.save(root+'asn_graph/train_'+str(len(c))+'_nodes', c)
        print('Graph info', subgraph.number_of_nodes(), subgraph.number_of_edges())
        print('Connected:', nx.is_strongly_connected(subgraph))
        print('Weighted:', nx.is_weighted(subgraph))
        print('---------------------------------------')

Graph info 510 1600
Connected: True
Weighted: False
---------------------------------------


In [14]:
random.seed(2024)
np.random.seed(2024)

asn_graph_train = nx.DiGraph()
asn_graph_train.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_train.add_edges_from([(i['source'], i['target']) for i in G['links']])

asn_graph_test = nx.DiGraph()
asn_graph_test.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph_test.add_edges_from([(i['source'], i['target']) for i in G['links']])


# train and test has no overlap; within train, no overlap; within test, can have overlap
train_num = [1739]
total_nodes = np.array([i for i in asn_graph_train.nodes()])

for i in train_num:
    nodes = np.random.choice(total_nodes, i, replace=False)
    sampled_subgraph = asn_graph_train.subgraph(nodes)
    cc = sorted(nx.strongly_connected_components(sampled_subgraph), key=len, reverse=True)[:1]
    for c in cc:
        c = list(c)
        subgraph = asn_graph_train.subgraph(c)
        np.save(root+'asn_graph/train_'+str(len(c))+'_nodes', c)
        print('Graph info', subgraph.number_of_nodes(), subgraph.number_of_edges())
        print('Connected:', nx.is_strongly_connected(subgraph))
        print('Weighted:', nx.is_weighted(subgraph))
        print('---------------------------------------')

Graph info 1739 8558
Connected: True
Weighted: False
---------------------------------------


### generate k-shortest path

In [10]:
from itertools import islice
def k_shortest_paths(G, source, target, k, weight=None):
    return list(islice(nx.shortest_simple_paths(G, source, target, weight=weight), k))

### function define

In [11]:
# G: G(V, E, C)                           nx.weighted.graph
# STD: demands align with ST pairs        list[([s1, t1], dmd1), ([s2, t2], dmd2),...], (string, int)
# Pd: set of paths for every st pair      dict{[s1, t1]: [([path1], cost1), ([path2], cost2)...], [s2, t2]...}
# # of std pairs = # of keys in Pd
# k: k shortest path for every (s, t, d) tuple

def generate_reallocation(G, STD, Pd, k):
    
    # constraint 1
    A1 = []
    for i in range(len(STD)):
        a = np.zeros(len(STD)*k)
        a[k*i: k*i+k] = 1
        A1.append(a)
    A1 = np.array(A1)
    b1 = np.ones(len(STD))

    # constrain 2
    edges_list = list(G.edges())
    A2 = np.zeros((G.number_of_edges(), len(STD)*k))

    for i in range(len(STD)):
        paths = Pd[tuple(STD[i][0])] # possible paths
        for j in range(k):
            p = paths[j]   # path[j] is the path
            for n in range(len(p)-1):
                if (p[n], p[n+1]) in edges_list:
                    A2[edges_list.index((p[n], p[n+1]))][k*i+j] = STD[i][1]
                else:
                    continue  
    b2 = np.array(list(nx.get_edge_attributes(G,'weight').values()))
    zero_row_indices = np.where(A2.any(axis=1)==0)[0]
    A2 = np.delete(A2, zero_row_indices, axis=0)
    b2 = np.delete(b2, zero_row_indices, axis=0)

    for i in range(A2.shape[0]):
        A2[i] = A2[i]/b2[i]
        b2[i] = b2[i]/b2[i]
    
    # obj
    c = -1*np.concatenate([np.ones(k)*STD[i][1] for i in range(len(STD))])
        
    return A1, b1, A2, b2, c

### Read all train and their capacities and load as a group

In [12]:
f = open(root+'asn_graph/ASN2k.json')
G = json.load(f)

asn_graph = nx.DiGraph()
asn_graph.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph.add_edges_from([(i['source'], i['target']) for i in G['links']])

asn_graph_weight = nx.DiGraph()
asn_graph_weight.add_nodes_from([i['id'] for i in G['nodes']])
for i in G['links']:
    asn_graph_weight.add_edge(i['source'], i['target'], weight=i['capacity'])

    
num_nodes = [217, 237] # number of nodes for asn train

train_group = []
train_group_noC = []

for n in num_nodes:
    nodes = np.load(root+'asn_graph/train_' + str(n) + '_nodes.npy')
    subgraph = asn_graph_weight.subgraph(nodes)
    subgraph_noC = asn_graph.subgraph(nodes)
    print('Strongly connected:', nx.is_strongly_connected(subgraph))
    print('# of nodes and edges:', subgraph.number_of_nodes(), subgraph.number_of_edges())
    print('Weighted:', nx.is_weighted(subgraph))
    print('Strongly connected:', nx.is_strongly_connected(subgraph_noC))
    print('# of nodes and edges:', subgraph_noC.number_of_nodes(), subgraph_noC.number_of_edges())
    print('Weighted:', nx.is_weighted(subgraph_noC))
    print('------------------------')
    train_group.append(subgraph)
    train_group_noC.append(subgraph_noC)

Strongly connected: True
# of nodes and edges: 217 590
Weighted: True
Strongly connected: True
# of nodes and edges: 217 590
Weighted: False
------------------------
Strongly connected: True
# of nodes and edges: 237 586
Weighted: True
Strongly connected: True
# of nodes and edges: 237 586
Weighted: False
------------------------


### Read all test and their capacities and load as a group

In [15]:
f = open(root+'asn_graph/ASN2k.json')
G = json.load(f)

asn_graph = nx.DiGraph()
asn_graph.add_nodes_from([i['id'] for i in G['nodes']])
asn_graph.add_edges_from([(i['source'], i['target']) for i in G['links']])

asn_graph_weight = nx.DiGraph()
asn_graph_weight.add_nodes_from([i['id'] for i in G['nodes']])
for i in G['links']:
    asn_graph_weight.add_edge(i['source'], i['target'], weight=i['capacity'])

    
num_nodes = [553, 1739] # number of nodes for asn train

test_group = []
test_group_noC = []

for n in num_nodes:
    nodes = np.load(root+'asn_graph/test_' + str(n) + '_nodes.npy')
    subgraph = asn_graph_weight.subgraph(nodes)
    subgraph_noC = asn_graph.subgraph(nodes)
    print('Strongly connected:', nx.is_strongly_connected(subgraph))
    print('# of nodes and edges:', subgraph.number_of_nodes(), subgraph.number_of_edges())
    print('Weighted:', nx.is_weighted(subgraph))
    print('Strongly connected:', nx.is_strongly_connected(subgraph_noC))
    print('# of nodes and edges:', subgraph_noC.number_of_nodes(), subgraph_noC.number_of_edges())
    print('Weighted:', nx.is_weighted(subgraph_noC))
    print('------------------------')
    test_group.append(subgraph)
    test_group_noC.append(subgraph_noC)

Strongly connected: True
# of nodes and edges: 553 1532
Weighted: True
Strongly connected: True
# of nodes and edges: 553 1532
Weighted: False
------------------------
Strongly connected: True
# of nodes and edges: 1739 8558
Weighted: True
Strongly connected: True
# of nodes and edges: 1739 8558
Weighted: False
------------------------


## dataset generation

In [17]:
### gen train ####
root = 'raw/'

import time
warnings.filterwarnings("error")

random.seed(2024)
np.random.seed(2024)


pkg_idx = 0              # instance index for your data generation
success_cnt = 0
fail_cnt = 0
bounds = (0., 1.)

max_iter = 15000
num = 10                 # number of instance generated

k = 4                    # k-shortest path
max_d = 5000             # demand max value
min_d = 1000             # demand min value

number_of_st = 10        # number of st pairs

data_t = 'train'         # 'train'/'test'

if data_t == 'train': 
    group = train_group
    group_noC = train_group_noC
else:
    group = test_group
    group_noC = test_group_noC


graph_info = []
for g in range(len(group)):
    stds = []
    ips = []
    success_cnt = 0
    times = []
    for n in range(num): # in case failsure case
        
        # generate st pairs with demand value 
        std = []
        Pd = {}
        count_std = 0
        while count_std != number_of_st:
            st = np.random.choice(group[g].nodes(), 2, replace=False)
            d = random.uniform(min_d, max_d)
            k_paths = k_shortest_paths(group_noC[g], st[0], st[1], k=k)
            if len(k_paths) != k:
                continue
            else:
                Pd[(st[0], st[1])] = k_paths
                std.append((st, d))
                count_std += 1

        A1, b1, A2, b2, c = generate_reallocation(group[g], std, Pd, k)
        A = np.vstack([A1, A2])
        b = np.hstack([b1, b2])
        
        n_time = time.time()
        try:
            A_eq = None
            b_eq = None
            A_ub = A
            b_ub = b
            res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, 
                          method='interior-point')
            times.append(time.time()-n_time)
            print(res)
            
        except (LinAlgWarning, OptimizeWarning, AssertionError):
            fail_cnt += 1
            continue
        else:
            if res.success and not np.isnan(res.fun):
                ips.append((torch.from_numpy(A).to(torch.float), torch.from_numpy(b).to(torch.float), torch.from_numpy(c).to(torch.float)))
                success_cnt += 1
                stds.append(std)
        if success_cnt == num:
            break

    with open(root+'/raw/instance_'+str(pkg_idx)+'_stds.pkl','wb') as f:
        pickle.dump(stds, f)
    with gzip.open(f'{root}/raw/instance_{pkg_idx}.pkl.gz', "wb") as file:
        pickle.dump(ips, file)
    pkg_idx += 1

    graph_info.append((group[g].number_of_nodes(), group[g].number_of_edges(), sum(times)/len(times)))

if data_t == 'train': 
    np.save(root+'/raw/asn_train_'+str(number_of_st)+'st_info', graph_info)
    for i in graph_info:
        print('Graph info and average time used:', i)
else:
    np.save(root+'/raw/asn_test_'+str(number_of_st)+'st_info', graph_info)
    for i in graph_info:
        print('Graph info and average time used:', i)
    
    
warnings.resetwarnings()

      message: Optimization terminated successfully.
      success: True
       status: 0
          fun: -9750.000034357108
            x: [ 4.676e-02  4.560e-02 ...  4.886e-03  2.613e-02]
          nit: 8
 intermediate: []
      message: Optimization terminated successfully.
      success: True
       status: 0
          fun: -9000.0000613218
            x: [ 1.554e-01  1.610e-02 ...  3.973e-02  3.973e-02]
          nit: 11
 intermediate: []
      message: Optimization terminated successfully.
      success: True
       status: 0
          fun: -13228.561989570215
            x: [ 1.445e-01  8.291e-02 ...  3.780e-02  1.946e-02]
          nit: 9
 intermediate: []
      message: Optimization terminated successfully.
      success: True
       status: 0
          fun: -11717.525419053525
            x: [ 2.309e-02  2.796e-02 ...  3.692e-02  3.692e-02]
          nit: 10
 intermediate: []
      message: Optimization terminated successfully.
      success: True
       status: 0
          fu