### Code to prepare labeled CCP training data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import itertools as it
from lib.problems.generator import ProblemDataset, Generator


pytorch version: 1.11.0 
pytorch CUDA version: 11.3 
pytorch CUDA available: True 
--------------------------------------- 




In [3]:
SIZE = 4096     # size of dataset
K = (3, 12)
N = 200             # number of nodes
SEED = 1234
MAX_CAP = 1.1   # [1.05, 1.1, 1.2, 1.5]
COORDS_DIST = "gm"      # [gm, mixed]
UNF_FRAC = 0.2  # fraction of uniformly sampled coords for mixed data

k = K
if isinstance(K, tuple):
    k = f"{K[0]}-{K[1]}"
    K = (K[0], K[1]+1)  # second value is exclusive in generator!
dist = COORDS_DIST if COORDS_DIST == "gm" else f"{COORDS_DIST}_unf{UNF_FRAC}"

save_pth = f"./data/CCP/CCP{N}/"
data_pth = save_pth + f"raw/"

In [4]:
files = os.listdir(data_pth)
print(files)
lbl_files = [f for f in files if "label" in f]
lbl_files.sort()
lbl_files

['train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part6.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part7.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part3.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part4.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part5.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part2.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part4.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part0.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part7.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part6.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part3.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part0.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part2.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part5.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part1.npz', 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_part1.npz']


['train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part6.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part7.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part3.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part2.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part4.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part0.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part5.npz',
 'train_gm_n200_k3-12_s4096_cap1_1_seed1234_labeled_part1.npz']

In [5]:
# merge parts
dataset = [Generator.load_dataset(os.path.join(data_pth, f)) for f in lbl_files]
len(dataset)

8

In [6]:
ds = list(it.chain.from_iterable(dataset))
len(ds)

4052

In [7]:
train_ds = ds[:4000]
size = len(train_ds)
print(size)
fname = f"train_{dist}_n{N}_k{k}_s{size}_cap{str(MAX_CAP).replace('.', '_')}_seed{SEED}.npz"
Generator.save_dataset(train_ds, filepath=os.path.join(save_pth, fname))

4000
Dataset file with same name exists already. Overwrite file? (y/n)


'data/CCP/CCP200/train_gm_n200_k3-12_s4000_cap1_1_seed1234.npz'

In [8]:
val_ds = ds[4000:]
size = len(val_ds)
print(size)
fname = f"val_{dist}_n{N}_k{k}_s{size}_cap{str(MAX_CAP).replace('.', '_')}_seed{SEED}.npz"
Generator.save_dataset(val_ds, filepath=os.path.join(save_pth, fname))

52
Dataset file with same name exists already. Overwrite file? (y/n)


'data/CCP/CCP200/val_gm_n200_k3-12_s52_cap1_1_seed1234.npz'

### Merging parts for shanghai and italia telecom datasets and the VRP data

In [3]:
# Telecom datasets
PROBLEM = "ccp"
DSET = "telecom_italia" #"shanghai_telecom"
P_SIZE = 512     # size of part
N_PARTS = 10
SIZE = N_PARTS*P_SIZE

SEED = 1234
N = 200
CAP = 1.1

save_pth = f"./data/CCP/benchmark/{DSET}/sub/"
data_pth = save_pth + f"raw/"
fname = f"n{N}_cap{str(CAP).replace('.', '_')}_seed{SEED}"

In [3]:
# VRP dataset
PROBLEM = "cvrp"
P_SIZE = 512     # size of part
N_PARTS = 10
SIZE = N_PARTS*P_SIZE

SEED = 1234
N = 200
CAP = 1.1
K = 30
coord_samp = 'mixed'  # ['uniform', 'gm', 'mixed']
unf_frac = 0.2
weight_samp = 'random_k_variant'  # ['random_int', 'uniform', 'gamma', 'random_k_variant']
dist = coord_samp if coord_samp == "gm" else f"{coord_samp}_unf{unf_frac}"

save_pth = f"./data/VRP/VRP{N}/"
data_pth = save_pth + f"raw/"
fname = f"{dist}_n{N}_kmax{K}_cap{str(CAP).replace('.', '_')}_seed{SEED}"

In [4]:
files = os.listdir(data_pth)
print(files)
lbl_files = [f for f in files if "label" in f]
lbl_files.sort()
lbl_files

['train_n200_s5120_cap1_1_seed1234_part2.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part7.npz', 'train_n200_s5120_cap1_1_seed1234_part4.npz', 'train_n200_s5120_cap1_1_seed1234_part6.npz', 'train_n200_s5120_cap1_1_seed1234_part1.npz', 'train_n200_s5120_cap1_1_seed1234_part8.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part0.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part4.npz', 'train_n200_s5120_cap1_1_seed1234_part7.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part2.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part6.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part3.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part8.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part1.npz', 'train_n200_s5120_cap1_1_seed1234_part5.npz', 'train_n200_s5120_cap1_1_seed1234_part9.npz', 'train_n200_s5120_cap1_1_seed1234_part3.npz', 'train_n200_s5120_cap1_1_seed1234_part0.npz', 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_pa

['train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part0.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part1.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part2.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part3.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part4.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part5.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part6.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part7.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part8.npz',
 'train_n200_s5120_cap1_1_seed1234_ccp_mh_labeled_part9.npz']

In [5]:
dataset = []
for i, f in enumerate(lbl_files):
    d = Generator.load_dataset(os.path.join(data_pth, f), convert=False)
    print(f"loaded part {i}")
    dataset.append(d)

loaded part 0
loaded part 1
loaded part 2
loaded part 3
loaded part 4
loaded part 5
loaded part 6
loaded part 7
loaded part 8
loaded part 9


In [6]:
merged_data = {}
for d in dataset:
    for k, v in d.items():
        if k not in ['size', 'problem']:
            if k not in list(merged_data.keys()):
                merged_data[k] = v
            else:
                merged_data[k] = np.concatenate((merged_data[k], v))

In [7]:
full_size = len(merged_data['graph_size'])
full_size

4981

In [8]:
SPLIT_IDX = 4900 #5000
train_ds = {k: v[:SPLIT_IDX] for k, v in merged_data.items()}
size = len(train_ds['graph_size'])
print(size)
train_ds['problem'] = np.array("ccp")
train_ds['size'] = np.array(len(train_ds['graph_size']))
sfname = f"train_s{full_size}_{fname}"
Generator.save_dataset(train_ds, filepath=os.path.join(save_pth, sfname), problem=PROBLEM)

4900


'data/CCP/benchmark/telecom_italia/sub/train_s4981_n200_cap1_1_seed1234.npz'

In [9]:
val_ds = {k: v[SPLIT_IDX:] for k, v in merged_data.items()}
size = len(val_ds['graph_size'])
print(size)
val_ds['problem'] = np.array("ccp")
val_ds['size'] = np.array(len(val_ds['graph_size']))
sfname = f"val_s{full_size}_{fname}"
Generator.save_dataset(val_ds, filepath=os.path.join(save_pth, sfname), problem=PROBLEM)

81


'data/CCP/benchmark/telecom_italia/sub/val_s4981_n200_cap1_1_seed1234.npz'