##### Imports the necessary modules and sets the system path to locate them.

In [61]:
import sys
import networkx
import astra_sim_sdk.astra_sim_sdk as astra_sim_kit
sys.path.append("../../utils")
from astra_sim import AstraSim, Collective, NetworkBackend
from infragraph.infragraph_service import InfraGraphService
from infragraph.blueprints.fabrics.clos_fat_tree_fabric import ClosFatTreeFabric
from infragraph.blueprints.devices.server import Server
from infragraph.blueprints.devices.generic_switch import Switch



##### Connects the client to the AstraSim gRPC server, initializes the AstraSim SDK, and creates a folder (tagged as specified) containing all configuration details, generated results, and logs.

In [62]:
astra = AstraSim(server_endpoint = "172.17.0.2:8989", tag = "infragraph_clos_3tier_trial")

Successfully connected to gRPC server at 172.17.0.2:8989


##### Creating Infragraph for 3 tier clos fabric

In [63]:
server = Server()
switch = Switch(port_count=4)
clos_fat_tree = ClosFatTreeFabric(switch, server, 3,[])
astra.configuration.infragraph.infrastructure.deserialize(clos_fat_tree.serialize())
print(astra.configuration.infragraph.infrastructure)

description: Clos Fat Tree Fabric
devices:
- components:
  - choice: cpu
    count: 1
    description: Generic CPU
    name: cpu
  - choice: xpu
    count: 2
    description: Generic GPU/XPU
    name: xpu
  - choice: switch
    count: 1
    description: NVLink Switch
    name: nvlsw
  - choice: switch
    count: 1
    description: PCI Express Switch Gen 4
    name: pciesw
  - choice: nic
    count: 2
    description: Generic Nic
    name: nic
  - choice: custom
    count: 1
    custom:
      type: mgmt-nic
    description: Mgmt Nic
    name: mgmt
  description: A generic server with npu_factor * 4 xpu(s)
  edges:
  - ep1:
      component: mgmt
    ep2:
      component: cpu[0]
    link: pcie
    scheme: one2one
  - ep1:
      component: cpu
    ep2:
      component: cpu
    link: fabric
    scheme: many2many
  - ep1:
      component: xpu
    ep2:
      component: nvlsw
    link: nvlink
    scheme: many2many
  - ep1:
      component: cpu[0]
    ep2:
      component: pciesw[0]
    link: p

##### Display Fabric

In [64]:
service = InfraGraphService()
service.set_graph(clos_fat_tree)
g = service.get_networkx_graph()
print(networkx.write_network_text(g, vertical_chains=True))
total_npus = 16

╙── server.4.mgmt.0
    │
    server.4.cpu.0
    │
    server.4.pciesw.0
    ├── server.4.xpu.0
    │   │
    │   server.4.nvlsw.0
    │   │
    │   server.4.xpu.1 ─ server.4.pciesw.0
    ├── server.4.nic.0
    │   │
    │   tier_0.4.port.0
    │   │
    │   tier_0.4.asic.0
    │   ├── tier_0.4.port.1
    │   │   │
    │   │   server.4.nic.1 ─ server.4.pciesw.0
    │   ├── tier_0.4.port.2
    │   │   │
    │   │   tier_1.4.port.0
    │   │   │
    │   │   tier_1.4.asic.0
    │   │   ├── tier_1.4.port.1
    │   │   │   │
    │   │   │   tier_0.5.port.2
    │   │   │   │
    │   │   │   tier_0.5.asic.0
    │   │   │   ├── tier_0.5.port.0
    │   │   │   │   │
    │   │   │   │   server.5.nic.0
    │   │   │   │   │
    │   │   │   │   server.5.pciesw.0
    │   │   │   │   ├── server.5.cpu.0
    │   │   │   │   │   │
    │   │   │   │   │   server.5.mgmt.0
    │   │   │   │   ├── server.5.xpu.0
    │   │   │   │   │   │
    │   │   │   │   │   server.5.nvlsw.0
    │   │   │   │   │   │
  

##### Generates workload execution traces for each rank and configures the data size, which is mandatory for AstraSim workload configuration.

In [65]:
astra.configuration.common_config.workload = astra.generate_collective(collective=Collective.ALLREDUCE, coll_size= 1 *1024*1024, npu_range=[0, total_npus])

All contents of the folder /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/configuration/workload have been deleted.
Generated 16 et in /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/configuration/workload


##### Configure the system configurations

In [66]:
astra.configuration.common_config.system.scheduling_policy = astra.configuration.common_config.system.LIFO
astra.configuration.common_config.system.endpoint_delay = 10
astra.configuration.common_config.system.active_chunks_per_dimension = 1
astra.configuration.common_config.system.all_gather_implementation = [astra.configuration.common_config.system.RING]
astra.configuration.common_config.system.all_to_all_implementation = [astra.configuration.common_config.system.DIRECT]
astra.configuration.common_config.system.all_reduce_implementation = [astra.configuration.common_config.system.ONERING]
astra.configuration.common_config.system.collective_optimization = astra.configuration.common_config.system.LOCALBWAWARE
astra.configuration.common_config.system.local_mem_bw = 1600

##### Configure the remote memory configuration

In [67]:
astra.configuration.common_config.remote_memory.memory_type = astra.configuration.common_config.remote_memory.NO_MEMORY_EXPANSION
print(astra.configuration.common_config.remote_memory)

memory_type: NO_MEMORY_EXPANSION
remote_mem_bw: 0
remote_mem_latency: 0



##### Configure the network backend choice and the topology choice for that backend


In [68]:
astra.configuration.network_backend.choice = astra.configuration.network_backend.NS3
astra.configuration.network_backend.ns3.topology.choice = astra.configuration.network_backend.ns3.topology.INFRAGRAPH
astra.configuration.network_backend.ns3.network.packet_payload_size = int(8192)


##### Adding ns3 trace and logical dimension 

In [69]:
astra.configuration.network_backend.ns3.logical_topology.logical_dimensions = [total_npus]
astra.configuration.network_backend.ns3.trace.trace_ids = []
for i in range(0, total_npus):
    astra.configuration.network_backend.ns3.trace.trace_ids.append(i)

##### Adding ASTRA-sim specific annotation

In [70]:
host_device_spec = astra_sim_kit.AnnotationDeviceSpecifications()
host_device_spec.device_bandwidth_gbps = 100
host_device_spec.device_latency_ms = 0.05
host_device_spec.device_name = "server"
host_device_spec.device_type = "host"
astra.configuration.infragraph.annotations.device_specifications.append(host_device_spec)

switch_device_spec = astra_sim_kit.AnnotationDeviceSpecifications()
switch_device_spec.device_bandwidth_gbps = 100
switch_device_spec.device_latency_ms = 0.05
switch_device_spec.device_name = "switch"
switch_device_spec.device_type = "switch"
astra.configuration.infragraph.annotations.device_specifications.append(
    switch_device_spec
)

<astra_sim_sdk.astra_sim_sdk.AnnotationDeviceSpecificationsIter at 0x783a85a1a600>

##### Configure ASTRA-sim cmd parameters

In [71]:
astra.configuration.common_config.cmd_parameters.comm_scale = 1
astra.configuration.common_config.cmd_parameters.injection_scale = 1
astra.configuration.common_config.cmd_parameters.rendezvous_protocol = False

#### Start the simulation by providing the network backend name in uppercase letters.

In [72]:
astra.run_simulation(NetworkBackend.NS3)

Generating Configuration ZIP
output_path: /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/config.zip
folder_path: /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/configuration
  group message from schema - communicator group configuration empty'

message: Simulation started successfully

astra-sim server Status: running
Downloading Output files....
Transferring Files from ASTRA-sim server
Downloading file: fct.txt
Downloading file: flow.txt
Downloading file: pfc.txt
Downloading file: qlen.txt
Downloading file: simulation.log
Downloading file: trace_out.tr
All files downloaded Successfully
Translating Metrics...
Generated fct.csv at:  /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/output/fct.csv
Generated: flow_stats.csv at:  /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/output/flow_stats.csv
All metrics translated successfull

  df = pd.read_csv(
  df = pd.read_csv(


##### Download all the configurations as a zip

In [73]:
astra.download_configuration()

Downloaded all configuration in /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/server_configuration.zip


##### Read output files

In [74]:
import pandas as pd
import os
from common import FileFolderUtils
df = pd.read_csv(os.path.join(FileFolderUtils.get_instance().OUTPUT_DIR, "fct.csv"))
df.head()

Unnamed: 0,Source Hex ip,Destination Hex ip,Source Port,Destination Port,Data size (B),Start Time,FCT,Standalone FCT
0,0b000001,0b000101,10000,100,65536,10,25930,26583
1,0b000401,0b000501,10000,100,65536,10,25930,26583
2,0b000801,0b000901,10000,100,65536,10,25930,26583
3,0b000a01,0b000b01,10000,100,65536,10,25930,26583
4,0b000c01,0b000d01,10000,100,65536,10,25930,26583


##### Save infragraph as a yaml

In [75]:
import yaml
with open(os.path.join(FileFolderUtils.get_instance().OUTPUT_DIR,"../infrastructure","3tier.yaml"),"w") as f:
    data = clos_fat_tree.serialize("dict")
    yaml.dump(data, f, default_flow_style=False, indent=4)

print("saved yaml to:", os.path.join(FileFolderUtils.get_instance().OUTPUT_DIR,"..","3tier.yaml"))

saved yaml to: /workspaces/astra_sim_service/client-scripts/utils/../trial/infragraph_clos_3tier_trial/output/../3tier.yaml
