## Generate and load data

In [11]:
import sys; sys.path.append(2*'../')

from rl4co.data.generate_data import generate_dataset

In [12]:
??generate_dataset

[0;31mSignature:[0m
[0mgenerate_dataset[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfilename[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdata_dir[0m[0;34m=[0m[0;34m'data'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mproblem[0m[0;34m=[0m[0;34m'all'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdata_distribution[0m[0;34m=[0m[0;34m'all'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdataset_size[0m[0;34m=[0m[0;36m10000[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgraph_sizes[0m[0;34m=[0m[0;34m[[0m[0;36m20[0m[0;34m,[0m [0;36m50[0m[0;34m,[0m [0;36m100[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moverwrite[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mseed[0m[0;34m=[0m[0;36m1234[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0mgenerate_dat

In [3]:
generate_dataset(problem='vrp', dataset_size=10_000, graph_sizes=[100], seed=1234, name='test')

## Load data with Numpy

In [4]:
# You may simply load with NumPy directly. Npz files are way more efficient than pickle files.
# The generated data uses the same process as Kool et al. (2019) and i

import numpy as np


fname = 'data/vrp/vrp100_test_seed1234.npz'
data = np.load(fname)

print(data.files)

print(data['locs'].shape)
print(data['demand'])

['depot', 'locs', 'demand', 'capacity']
(10000, 100, 2)
[[1. 3. 1. ... 7. 9. 5.]
 [6. 2. 5. ... 9. 8. 2.]
 [6. 2. 6. ... 1. 4. 1.]
 ...
 [4. 9. 7. ... 3. 7. 2.]
 [4. 5. 9. ... 8. 6. 6.]
 [5. 1. 9. ... 9. 5. 6.]]


In [5]:
# We can also load the data directly into a TensorDict with `load_data` method

from rl4co.envs import CVRPEnv

td = CVRPEnv.load_data(fname)

print(td['demand'])

tensor([[0.0200, 0.0600, 0.0200,  ..., 0.1400, 0.1800, 0.1000],
        [0.1200, 0.0400, 0.1000,  ..., 0.1800, 0.1600, 0.0400],
        [0.1200, 0.0400, 0.1200,  ..., 0.0200, 0.0800, 0.0200],
        ...,
        [0.0800, 0.1800, 0.1400,  ..., 0.0600, 0.1400, 0.0400],
        [0.0800, 0.1000, 0.1800,  ..., 0.1600, 0.1200, 0.1200],
        [0.1000, 0.0200, 0.1800,  ..., 0.1800, 0.1000, 0.1200]])


  warn(
  from .autonotebook import tqdm as notebook_tqdm


> NOTE: only in CVRP, the demand is first scaled, hence the difference in numbers

## Generate main table data


In [6]:
num_instances = 10_000
graph_sizes = [20, 50]
problems = ['tsp', 'vrp']


for problem in problems:
    generate_dataset(problem=problem, name='test', 
                     dataset_size=num_instances, graph_sizes=graph_sizes, seed=1234 )

## Generate generalization data

In [14]:
num_instances = 1000
graph_sizes = [10, 20, 50, 75, 100, 125, 150, 200, 500, 1000]
problems = ['tsp', 'vrp']


for problem in problems:
    generate_dataset(problem=problem, name='generalization', 
                     dataset_size=num_instances, graph_sizes=graph_sizes, seed=1234 )

?
?


In [8]:
# # Large scale: less instances, more graph sizes
# num_instances = 100
# graph_sizes = [500, 1000]
# problems = ['tsp', 'vrp']


# for problem in problems:
#     generate_dataset(problem=problem, name='test_generalization', 
#                      dataset_size=num_instances, graph_sizes=graph_sizes, seed=1234 )