# Hello, FABRIC: Create your first FABRIC slice

## Configure the Environment

Set the environment variable that will be used by this notebook. If you are using the FABRIC, JupyterHub some of the environment will be automatically configured for you.  You will only need to set your bastion username, upload your bastion private key, and set the path to where you put your bastion private key. Your bastion username and private key should already be in your possession.  If you do not have a bastion username and private key, please contact the FABRIC admins using the [FABRIC User Forum](https://learn.fabric-testbed.net/forums/) 

If you are using the FABRIC API outside of the JupyterHub you will need to configure all of the environment variables. Defaults below will be correct in many situations but you will need to confirm your configuration.  If you have questions about this configuration, please contact the FABRIC admins using the [FABRIC User Forum](https://learn.fabric-testbed.net/forums/) 

More information about accessing your experiments through the FABRIC bastion hosts can be found [here](https://learn.fabric-testbed.net/knowledge-base/logging-into-fabric-vms/).

In [None]:
import os

# If you are using the FABRIC JupyterHub, the following three evnrionment vars
# were automatically provided when you logged in.
#os.environ['FABRIC_CREDMGR_HOST']='cm.fabric-testbed.net'
#os.environ['FABRIC_ORCHESTRATOR_HOST']='orchestrator.fabric-testbed.net'
#os.environ['FABRIC_TOKEN_LOCATION']=os.environ['HOME']+'/work/fabric_token.json'

# Bastion IPs
os.environ['FABRIC_BASTION_HOST'] = 'bastion-1.fabric-testbed.net'

# Set your Bastion username and private key
os.environ['FABRIC_BASTION_USERNAME']=<INSERT_YOUR_FABRIC_USERNAME>
os.environ['FABRIC_BASTION_KEY_LOCATION']=os.environ['HOME']+'/work/.ssh/id_rsa_fabric'

# Set the keypair FABRIC will install in your slice. 
os.environ['FABRIC_SLICE_PRIVATE_KEY_FILE']=os.environ['HOME']+'/.ssh/id_rsa'
os.environ['FABRIC_SLICE_PUBLIC_KEY_FILE']=os.environ['HOME']+'/.ssh/id_rsa.pub'

# If your slice private key uses a passphrase, set the passphrase
#from getpass import getpass
#print('Please input private key passphrase. Press enter for no passphrase.')
#os.environ['FABRIC_SLICE_PRIVATE_KEY_PASSPHRASE']=getpass()

## Setup the Experiment

#### Import the FABRIC API

In [None]:
import json
import traceback
from fabrictestbed_extensions.fablib.fablib import fablib

#### Create the FABRIC Proxies

The FABRIC API is used via proxy objects that manage connections to the control framework.  

#### (Optional) Query Available Resources

This optional command queries the FABRIC services to find the available resources. It may be useful for finding a site with available capacity.

In [None]:
try:
    available_resources = fablib.get_available_resources()
    print(f"Available Resources: {available_resources}")
    available_resources.draw()
except Exception as e:
    print(f"Error: {e}")
    traceback.print_exc()

## Create the Experiment Slice

Load aggragate ads.

In [None]:
import glob
import json

#aggregate_json_dir=os.environ['HOME']+"/work/git/jupyter-examples/fabric_examples/testing_and_debugging/aggregates/"
aggregate_json_dir="./aggregates/"

aggregates = {}
aggregate_files = glob.glob(aggregate_json_dir+"*.json")
for file_name in aggregate_files:
    print(f"Reading {file_name}")
    site_name = file_name.replace(aggregate_json_dir,"",1).split(".")[0]
    #print(f"{site_name}")
    with open(file_name, 'r') as infile:
        site_info = json.load(infile)
    #print(f"{site_json}")
    aggregates[site_name] = site_info

#### Configure the Experiment Parameters



In [None]:
slice_name_base = 'MySlice_Components_Test'
#test_sites = [ 'UKY', 'RENC', 'LBNL' ]
#test_sites = [ 'TACC', 'MAX', 'UTAH', 'NCSA', 'MICH', 'WASH', 'DALL', 'SALT']
#test_sites = [ 'TACC', 'UTAH', 'NCSA', 'MICH', 'WASH', 'DALL', 'SALT' ]
test_sites = [ 'MAX' ]

node_name_base = 'node'
#image = 'default_centos_8'
image = 'default_ubuntu_20'
#image = 'default_rocky_8'
cores = 2
ram = 8
disk = 10

node_count=5



In [None]:
#tests = {}
#for site in sites:
#    workers = []
#    for worker_num in range(1,4):
#        workers.append(f"{site.lower()}-w{worker_num}.fabric-testbed.net")
#    tests[site] = workers

#print(f"{tests}")


for site in test_sites:
    print(f"{site}")
    site_info = aggregates[site]
    for node_info in site_info['Nodes']:
        print(f"{node_info['Name']}")
        skipped_first_cx6 = False
        for component_info in node_info['Components']:
            if not skipped_first_cx6 and component_info['Model'] == 'ConnectX-6':
                skipped_first_cx6 = True
                continue
                                                    
            print(f"{component_info['Type']} : {component_info['Model'].replace(' ','').replace('/','-')}")


### Create Slice

<img src="./figs/SingleNode.png" width="20%"><br>

Create a single node with basic compute capabilities. The submit function will block until the node is ready and will display a progress bar.


In [None]:
slice_name=f"{slice_name_base}"
print(f"Creating slice {slice_name}")

try:
    slice = fablib.new_slice(slice_name)
    for site in test_sites:
        print(f"{site}")
        site_info = aggregates[site]
        for node_info in site_info['Nodes']:
            print(f"{node_info['Name']}")
            worker = node_info['Name']
            worker_short_name = worker.split(".")[0]
            print(f"slice name: {worker}")

            skipped_first_cx6 = False
            cx6_count = 0
            cx5_count = 0
            node_num = 0
            for component_info in node_info['Components']:
                if not skipped_first_cx6 and component_info['Model'] == 'ConnectX-6':
                    skipped_first_cx6 = True
                    continue

                if cx6_count == 0 and component_info['Model'] == 'ConnectX-6':
                    cx6_count += 1
                    continue
            
                if cx5_count == 0 and component_info['Model'] == 'ConnectX-5':
                    cx5_count += 1
                    continue
                    
                node_num += 1

                node_name = f"{node_name_base}_{worker_short_name}_{node_num}_{component_info['Model'].replace(' ','').replace('/','-')}"
                # Add node
                node = slice.add_node(name=node_name, site=site)
                node.set_capacities(cores=cores, ram=ram, disk=disk)
                node.set_image(image)
                node.set_host(worker)


                # NIC_Basic 
                # NIC_ConnectX_6
                # NIC_ConnectX_5
                # NVME_P4510
                # GPU_TeslaT4
                # GPU_RTX6000
                if component_info['Type'] == "EthernetCardPF" and component_info['Model'] == "ConnectX-6":
                    fablib_compenent_model = 'NIC_ConnectX_6'
                    cx6_count = 0
                elif component_info['Type'] == "EthernetCardPF" and component_info['Model'] == "ConnectX-5":
                    fablib_compenent_model = 'NIC_ConnectX_5'
                    cx5_count = 0
                elif component_info['Type'] == "NVMe" and component_info['Model'] == "P4510":
                    fablib_compenent_model = 'NVME_P4510'
                elif component_info['Type'] == "GPU" and component_info['Model'] == "Tesla T4":
                    fablib_compenent_model = 'GPU_TeslaT4'
                elif component_info['Type'] == "GPU" and component_info['Model'] == "Quadro RTX 6000/8000":
                    fablib_compenent_model = 'GPU_RTX6000'

                node.add_component(model=fablib_compenent_model, name=f"{node_name}_{fablib_compenent_model}")

    #Submit Slice Request
    slice.submit(wait_progress=False)
except Exception as e:
    print(f"Slice Failed: {e}")
    traceback.print_exc()


In [None]:
import time
from fabrictestbed.slice_manager import SliceManager, Status, SliceState

while True:
    try:
        print("Trying ... ",end="")
        slices = fablib.get_slices()
        #slices = fablib.get_slice_list(excludes=[SliceState.Dead,SliceState.Closing],verbose=False)
        #slices = fablib.get_slice_list(excludes=[],verbose=False)
        for slice in slices:
            #print(f"{slice}")
            print(f"{slice.get_name()}, {slice.get_state()}, {slice.get_slice_id()}")
            #slice.delete()
        #break
    except Exception as e:
        print(f"Get Slices Fail: {e}")
        #time.sleep(30)
        #traceback.print_exc()
        #print(f"Waking")
    print("sleeping")
    time.sleep(30)
print(f"done!")


In [None]:
#closing_slices = slices
#all_slices = slices
print(f"{len(slices)}")
for slice in all_slices:
    print(f"{slice.slice_state}")
    print(f"{slice}")

In [None]:
import time

slice_name=f"{slice_name_base}"
            
try:
    print(f"Waiting for slice {slice_name} ")

    #Get Slice
    slice = fablib.get_slice(slice_name)

    #Wait for ssh to be active
    slice.wait_ssh(progress=True, timeout=600,interval=60)
    #time.sleep(30)
    
    #Run post boo config
    slice.post_boot_config(verbose=False)
except Exception as e:
    print(f"Slice Failed: {e}")
    traceback.print_exc()

### Print the Node's Attributes

Each node in the slice has a set of get functions that return the node's attributes.

In [None]:
slice_name=f"{slice_name_base}"
print(f"Getting slice {slice_name} ")
try: 
    slice = fablib.get_slice(slice_name)
 
    for node in slice.get_nodes():
        print("Node:")
        print(f"   Name              : {node.get_name()}")
        print(f"   Cores             : {node.get_cores()}")
        print(f"   RAM               : {node.get_ram()}")
        print(f"   Disk              : {node.get_disk()}")
        print(f"   Image             : {node.get_image()}")
        print(f"   Image Type        : {node.get_image_type()}")
        print(f"   Host              : {node.get_host()}")
        print(f"   Site              : {node.get_site()}")
        print(f"   Management IP     : {node.get_management_ip()}")
        print(f"   Reservation ID    : {node.get_reservation_id()}")
        print(f"   Reservation State : {node.get_reservation_state()}")
        print(f"   SSH Command       : {node.get_ssh_command()}")
        print(f"   SSH Test Result   : {node.test_ssh()}")


        for component in node.get_components():
            print(f"      Name             : {component.get_name()}")
            print(f"      Details          : {component.get_details()}")
            print(f"      Disk (G)         : {component.get_disk()}")
            print(f"      Units            : {component.get_unit()}")
            print(f"      PCI Address      : {component.get_pci_addr()}")
            print(f"      Model            : {component.get_model()}")
            print(f"      Type             : {component.get_type()}") 
        print(f"   Interfaces        :  ")
        for interface in node.get_interfaces():
            print(f"       Name                : {interface.get_name()}")
            print(f"           Bandwidth           : {interface.get_bandwidth()}")
            print(f"           VLAN                : {interface.get_vlan()}")       
            print(f"           OS Interface        : {interface.get_os_interface()}")       

        print()    
except Exception as e:
    print(f"Fail: {e}")

### Test SSH

In [None]:
slice_name=f"{slice_name_base}"
print(f"Getting slice {slice_name} ")
try: 
    slice = fablib.get_slice(slice_name)
    for node in slice.get_nodes():
        print(f"Node: {node.get_name()}, Host: {node.get_host()}, SSH Test: {node.test_ssh()}")
        
except Exception as e:
    print(f"Fail: {e}")

## Accessing the VM


#### SSH with fablib

Most experiments will require automated configuration and execution. You can use the fablib library to execute arbitrary commands on your node. 

The following code demonstrates how to use fablib to execute a "Hello, FABRIC" bash script. The library uses the bastion and VM keys defined at the top of this notebook to jump through the bastion host and execute the script.

In [None]:
slice_name=f"{slice_name_base}"
print(f"Getting slice {slice_name} ")
try: 
    slice = fablib.get_slice(slice_name)
    for node in slice.get_nodes():
        component = node.get_components()[0]
        #print(f"{component.get_type()} - {component.get_model()}")
        if str(component.get_type()) == "SmartNIC" and str(component.get_model()) == "ConnectX-6":
            search_string = "Mellanox Technologies MT28908 Family [ConnectX-6]"
        elif str(component.get_type()) == "SmartNIC" and str(component.get_model()) == "ConnectX-5":
            search_string = "Mellanox Technologies MT27800 Family [ConnectX-5]"
        elif str(component.get_type()) == "NVME"  and str(component.get_model()) == "P4510":
            search_string = "Toshiba Corporation NVMe SSD Controller"
        elif str(component.get_type())== "GPU"  and str(component.get_model()) == "Tesla T4":
            search_string = "NVIDIA Corporation TU104GL [Tesla T4]"
        elif str(component.get_type()) == "GPU"  and str(component.get_model()) == "RTX6000":
            search_string = "NVIDIA Corporation TU102GL [Quadro RTX 6000/8000]"
        else:
            search_string = "None"

        #print(f"search_string: {search_string}")
        stdout, stderr = node.execute(f'lspci')
        #print(f"stdout: {stdout}")
        if search_string in stdout:
            print(f"Node: {node.get_name()}: {component.get_type()} - {component.get_model()} : Success")
        else:
            print(f"Node: {node.get_name()}: {component.get_type()} - {component.get_model()} : Fail")
            
        #print(stdout)
except Exception as e:
    print(f"Fail: {e}")

## Delete Slice

Please delete your slicd when you are done with your experiment.

In [None]:
slice_name=f"{slice_name_base}"
print(f"Getting slice {slice_name} ")
try: 
    slice = fablib.get_slice(slice_name)
    slice.delete()
except Exception as e:
    print(f"Fail: {e}")