# Hello, FABRIC: Create your first FABRIC slice


## Setup the Experiment

#### Import the FABRIC API

In [1]:
import json
import traceback

from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager

# fablib = fablib_manager(fabric_rc=/path/to/fabric_rc)
# If using Fabric Testbed Jupyter Hub, use the above line instead of the below
fablib = fablib_manager(fabric_rc="/Users/xinxinmei/.ssh/fabric_rc")     # path to local file "fabric_rc"
                     
fablib.show_config()

0,1
Credential Manager,cm.fabric-testbed.net
Orchestrator,orchestrator.fabric-testbed.net
Token File,/Users/xinxinmei/.ssh/fabric_token.json
Project ID,bbe0d94c-736b-477a-a2e6-fef9fe7ac9ca
Bastion Host,bastion.fabric-testbed.net
Bastion Username,xmei_0000124604
Bastion Private Key File,/Users/xinxinmei/.ssh/fabric-bastion-key
Slice Public Key File,/Users/xinxinmei/.ssh/slice_key.pub
Slice Private Key File,/Users/xinxinmei/.ssh/slice_key
Sites to avoid,


0,1
Credential Manager,cm.fabric-testbed.net
Orchestrator,orchestrator.fabric-testbed.net
Token File,/Users/xinxinmei/.ssh/fabric_token.json
Project ID,bbe0d94c-736b-477a-a2e6-fef9fe7ac9ca
Bastion Host,bastion.fabric-testbed.net
Bastion Username,xmei_0000124604
Bastion Private Key File,/Users/xinxinmei/.ssh/fabric-bastion-key
Slice Public Key File,/Users/xinxinmei/.ssh/slice_key.pub
Slice Private Key File,/Users/xinxinmei/.ssh/slice_key
Sites to avoid,


#### (Optional) Query Available Resources

This optional command queries the FABRIC services to find the available resources. It may be useful for finding a site with available capacity.

In [None]:
try:
    available_resources = fablib.get_available_resources()
    print(f"Available Resources: {available_resources}")
except Exception as e:
    print(f"Error: {e}")

## Create the Experiment Slice

In [20]:
# slice.delete()

In [None]:
# Configuration variables
slice_name = 'K8s_on_FABRIC'
site = "UCSD"
image = 'default_ubuntu_20'
network_name = 'NET1'

# Create lists for node configurations
nic_names, node_names = [], []
for i in range(1, 4):
    if i == 1:
        node_names.append(f"cpnode")
        nic_names.append(f"NIC1")
    else:
        node_names.append(f"wknode{i-1}")
        nic_names.append(f"NIC{i}")

print(f"Site: {site}")
print(f"Nodes: {node_names}")

try:
    # Create Slice
    slice = fablib.new_slice(slice_name)
    
    # Add nodes and interfaces
    nodes = []
    ifaces = []
    for i in range(len(node_names)):
        node = slice.add_node(
            name=node_names[i],
            site=site,
            image=image
        )
        nodes.append(node)
        iface = node.add_component(
            model='NIC_Basic', 
            name=nic_names[i]
        ).get_interfaces()[0]
        ifaces.append(iface)
    
    # Network
    net1 = slice.add_l3network(
        name=network_name, 
        interfaces=ifaces, 
        type='IPv4'
    )
    
    # Submit Slice Request
    slice_id = slice.submit()

except Exception as e:
    print(f"{e}")

Site: UCSD
Nodes: ['cpnode', 'wknode1']
Submit request error: return_status Status.FAILURE, slice_reservations: (500)
Reason: INTERNAL SERVER ERROR
HTTP response headers: HTTPHeaderDict({'Server': 'nginx/1.21.6', 'Date': 'Thu, 06 Feb 2025 04:12:08 GMT', 'Content-Type': 'text/html; charset=utf-8', 'Content-Length': '208', 'Connection': 'keep-alive', 'Access-Control-Allow-Credentials': 'true', 'Access-Control-Allow-Headers': 'DNT, User-Agent, X-Requested-With, If-Modified-Since, Cache-Control, Content-Type, Range, Authorization', 'Access-Control-Allow-Methods': 'GET, POST, PUT, PATCH, DELETE, OPTIONS', 'Access-Control-Allow-Origin': '*', 'Access-Control-Expose-Headers': 'Content-Length, Content-Range, X-Error', 'X-Error': 'Slice K8s_on_FABRIC already exists'})
HTTP response body: b'{\n    "errors": [\n        {\n            "details": "Slice K8s_on_FABRIC already exists",\n            "message": "Internal Server Error"\n        }\n    ],\n    "size": 1,\n    "status": 500,\n    "type": "

### Print the Node's Attributes

Each node in the slice has a set of get functions that return the node's attributes.

In [None]:
try:
    slice = fablib.get_slice(slice_id=slice_id)
    nodes = slice.get_nodes()
    node_dict = {}
    address_dict = {}
    
    # Create node and address variables
    for i, node in enumerate(nodes, 1):
        node_name = node_names[i-1]
        node_dict[node_name] = node
        globals()[node_name] = node
        address_dict[f"{node_name}_address"] = None
        globals()[f"{node_name}_address"] = None
        print(f"{node_name}: {node}")

    # Get network info
    network = slice.get_network(name=network_name)
    network_available_ips = network.get_available_ips()
    print(f"Network: {network}")
    
    # Configure IPs for all nodes
    for node_name in node_names:
        node = node_dict[node_name]
        node_iface = node.get_interface(network_name=network_name)
        
        # Assign and store IP address
        node_address = network_available_ips.pop(0)
        node_iface.ip_addr_add(addr=node_address, subnet=network.get_subnet())
        address_dict[f"{node_name}_address"] = node_address
        globals()[f"{node_name}_address"] = node_address
        
        # Print node config
        print(f"\nConfiguration for {node_name}:")
        print(f"Address assigned: {node_address}")
        stdout, _ = node.execute(f'ip addr show {node_iface.get_os_interface()}')
        print(stdout)
        stdout, _ = node.execute('ip route list')
        print(stdout)

    # Print summary
    print("\nNode and Address Summary:")
    for node_name in node_names:
        print(f"{node_name}: {globals()[node_name]}")
        print(f"{node_name}_address: {globals()[f'{node_name}_address']}")

except Exception as e:
    print(f"Fail: {e}")
    traceback.print_exc()


## Start the control plane
We follow the instructions that we have here: https://github.com/apache/openwhisk-deploy-kube/blob/master/docs/k8s-diy-ubuntu.md

In [None]:
try:
    file_attributes = cpnode.upload_file(local_file_path="config_control_plane.sh", remote_file_path="config_control_plane.sh")
    
    stdout, stderr = cpnode.execute(f"chmod +x config_control_plane.sh && ./config_control_plane.sh {network.get_subnet()} {cpnode_address}")

except Exception as e:
    print(f"Exception: {e}")
 

## Start the worker node
Put the join command in the `join_cmd` variable. The command should look like this: ` kubeadm join <control-plane-ip>:6443 --token <token> --discovery-token-ca-cert-hash sha256:<hash>`

In [None]:
join_cmd = "sudo kubeadm join 10.146.4.2:6443 --token l255bp.wqi5i6br0jg7f4z2 --discovery-token-ca-cert-hash sha256:069646097e377bcd9e6d66ee7779a0d3e0930d95d1e6a79f73f22f70b1098b5e"

try:
    # Loop through worker nodes in node_dict
    for node_name, node in node_dict.items():
        if node_name == 'cpnode':  # Skip control plane node
            continue
            
        print(f"\nConfiguring {node_name}...")
        
        # Upload and execute config script
        try:
            print(f"Uploading and executing config_worker_node.sh on {node_name}...")
            file_attributes = node.upload_file(
                local_file_path="config_worker_node.sh", 
                remote_file_path="config_worker_node.sh"
            )
            exec_cmd = f"chmod +x config_worker_node.sh && ./config_worker_node.sh {join_cmd}"
            stdout, stderr = node.execute(exec_cmd)
            print(f"Config output for {node_name}:", stdout)
        except Exception as e:
            print(f"Failed to configure {node_name}: {e}")
            continue  # Skip to next node if configuration fails

except Exception as e:
    print(f"Main exception: {e}")
    traceback.print_exc()

## Monitor the kubernetes cluster

In [None]:
try:
    # Get nodes
    print("Getting nodes...")
    stdout, stderr = cpnode.execute("kubectl get nodes")
    print(f"Nodes:\n{stdout}")
    if stderr:
        print(f"Stderr: {stderr}")

    # Get all resources across namespaces
    print("Getting all kubernetes resources...")
    stdout, stderr = cpnode.execute("kubectl get all --all-namespaces")
    print(f"All resources:\n{stdout}")
    if stderr:
        print(f"Stderr: {stderr}")
    
    # Get all pods with more details
    print("\nGetting detailed pods status across all namespaces...")
    stdout, stderr = cpnode.execute("kubectl get pods -A -o wide")
    print(f"Detailed pods status:\n{stdout}")
    if stderr:
        print(f"Stderr: {stderr}")
        
    # Describe kube-system namespace pods
    print("\nDescribing kube-system pods...")
    stdout, stderr = cpnode.execute("kubectl describe pods -n kube-system")
    print(f"Kube-system pods details:\n{stdout}")
    if stderr:
        print(f"Stderr: {stderr}")
    
    # Get logs from specific system pods
    system_components = ['kube-apiserver', 'kube-controller-manager', 'kube-scheduler', 'etcd']
    
    for component in system_components:
        print(f"\nGetting logs for {component}...")
        try:
            # First get the pod name
            cmd = f"kubectl get pods -n kube-system -l component={component} -o jsonpath='{{.items[0].metadata.name}}'"
            stdout, stderr = cpnode.execute(cmd)
            if stdout:
                pod_name = stdout.strip()
                # Then get the logs
                stdout, stderr = cpnode.execute(f"kubectl logs -n kube-system {pod_name} --tail=50")
                print(f"Logs from {component} ({pod_name}):\n{stdout}")
            else:
                print(f"No pod found for component {component}")
        except Exception as e:
            print(f"Error getting logs for {component}: {e}")

except Exception as e:
    print(f"Exception while monitoring kubernetes cluster: {e}")
    traceback.print_exc()



## Node has successfully joined the cluster.

# Deploying a hello world application.

## First, we pull a hello world image and create a "deployment".

In [None]:
try:
    stdout, stderr = cpnode.execute("kubectl create deployment kubernetes-bootcamp --image=gcr.io/google-samples/kubernetes-bootcamp:v1 --replicas=9")
    print(f"stdout: {stdout}")
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")
 

## Now let's do some status commands.

In [None]:
try:
    _, stderr = cpnode.execute("kubectl get pods")
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")

## Pick the correct pod name and run the command below.

In [None]:

try:
    _, stderr = cpnode.execute("kubectl describe pod kubernetes-bootcamp-68cfbdbb99-d67sb")
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")

## The next thing we need to do is to create what is called a "service".

We are going to use it to expose the deployment to the outside, through a port, which is 8080. Like this:

Note that the service itself will still need to be exposed. There's another "expose" step that we need to make.

In [None]:
try:
    _, stderr = cpnode.execute('kubectl expose deployment/kubernetes-bootcamp --type="ClusterIP" --port 8080')
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")

## Let's check if the service was created.

In [None]:
try:
    _, stderr = cpnode.execute("kubectl get service kubernetes-bootcamp")
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")

## Finally, we need to run a port forwarding command in order to expose the service to the outside.

Modify the --address flag. Use the "CLUSTER-IP" that is output form the command above.

In [None]:
try:
    _, stderr = cpnode.execute("kubectl port-forward --address 10.111.180.18 service/kubernetes-bootcamp 8080:8080 > /dev/null 2>&1 &")
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")

## Now our application should finally be visible. Let's test the deployment on the control plane machine itself.

In [None]:
try:
    _, stderr = cpnode.execute("curl 10.111.180.18:8080")
    print(f"stderr: {stderr}")
except Exception as e:
    print(f"Exception: {e}")

## Delete Slice

Please delete your slicd when you are done with your experiment.

In [None]:
try:
    slice = fablib.get_slice(slice_name)
    slice.delete()
except Exception as e:
    print(f"Fail: {e}")