In [55]:
%load_ext autoreload
from ddls.devices.processors.gpus.A100 import A100
from ddls.utils import ddls_graph_from_pbtxt_file
from ddls.plotting.plotting import plot_computation_graph
from ddls.environments.job_placing.job_placing_all_nodes_environment import JobPlacingAllNodesEnvironment
from ddls.demands.jobs.job import Job
from ddls.distributions.uniform import Uniform

import glob

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
%autoreload

# get file paths
path_to_files = '/scratch/datasets/ddls/jobs/tensorflow_synthetic_graphs/valid'
file_paths = glob.glob(path_to_files + '/*')
    
# create ddls graph
num_graphs = 1
ddls_computation_graphs = [ddls_graph_from_pbtxt_file(file_path, processor_type_profiled='A100', verbose=False) for file_path in file_paths[:num_graphs]]

# visualise
visualise = False
if visualise:
    for graph in ddls_computation_graphs:
        fig = plot_computation_graph(graph, scaling_factor=3, title='ddls_graph', show_fig=True, verbose=True)
        
# create ddls jobs from ddls graphs
jobs = [Job(computation_graph=graph, num_training_steps=2) for graph in ddls_computation_graphs]

In [63]:
%autoreload

node_config = {'type_1':
                  {
                      'num_nodes': 16,
                      'workers_config': 
                          [
                              {
                               'num_workers': 4,
                               'worker': A100
                              }
                          ]
                  }
              }

print(node_config)

{'type_1': {'num_nodes': 16, 'workers_config': [{'num_workers': 4, 'worker': <class 'ddls.devices.processors.gpus.A100.A100'>}]}}


In [64]:
%autoreload

topology_config = {'type':
                      'torus',
                   'kwargs':
                      {
                          'x_dims': 4,
                          'y_dims': 4
                      }
                  }

In [65]:
%autoreload

env = JobPlacingAllNodesEnvironment(topology_config=topology_config,
                                    node_config=node_config,
                                    jobs=jobs,
                                    job_sampling_mode='remove',
                                    job_interarrival_time_dist=Uniform(min_val=1, max_val=1000),
                                    max_cluster_simulation_run_time=float('inf'),
                                    job_queue_capacity=10,
                                    seed=0,
#                                     path_to_save='/scratch/datasets/ddls/sims',
                                    path_to_save=None,
                                    save_cluster_data=False,
                                    save_freq=100,
                                    use_sqlite_database=True)

In [66]:
%autoreload

env.reset()


Encoded job obs:
(96, 13)
[[array([0.0], dtype=object) array(False, dtype=object)
  array(0, dtype=object) ...
  array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        dtype=object)
  array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        dtype=object)
  array(0.0, dtype=object)]
 [array([0.12357414448669202], dtype=object) array(False, dtype=object)
  array(0, dtype=object) ...
  array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        dtype=object)
  array([0, 0, 0, 0, 0, 0, 0, 0, 

In [None]:
print(env.cluster)