In [1]:
from kubernetes import client, config
from gymnasium.spaces import Discrete

In [2]:
space = Discrete(11, start=-5)
print(f"start: {space.start},  n: {space.n}")
for i in range(50):
    print(f"sample: {space.sample()}")

start: -5,  n: 11
sample: 2
sample: 0
sample: -4
sample: 0
sample: 1
sample: 3
sample: 3
sample: -2
sample: -4
sample: 2
sample: 5
sample: -4
sample: -4
sample: 5
sample: 2
sample: 1
sample: 0
sample: -4
sample: 3
sample: -3
sample: 2
sample: 5
sample: 0
sample: -2
sample: -4
sample: 3
sample: 0
sample: 5
sample: 5
sample: 5
sample: -5
sample: 1
sample: -5
sample: 5
sample: 3
sample: 0
sample: 1
sample: -4
sample: 3
sample: -5
sample: -2
sample: 1
sample: -4
sample: -2
sample: 3
sample: -5
sample: 1
sample: 2
sample: 0
sample: 4


In [3]:
config.load_kube_config()
cluster = client.AppsV1Api()
api = client.CustomObjectsApi()

In [4]:
metric_data = api.list_namespaced_custom_object(
    group="metrics.k8s.io",
    version="v1beta1",
    namespace="default",
    plural="pods",
)

metric_data



{'kind': 'PodMetricsList',
 'apiVersion': 'metrics.k8s.io/v1beta1',
 'metadata': {},
 'items': [{'metadata': {'name': 'nodejs-deployment-589bcd69d-2cnml',
    'namespace': 'default',
    'creationTimestamp': '2025-07-16T03:13:14Z',
    'labels': {'app': 'nodejs', 'pod-template-hash': '589bcd69d'}},
   'timestamp': '2025-07-16T03:13:06Z',
   'window': '15.006s',
   'containers': [{'name': 'app',
     'usage': {'cpu': '51646n', 'memory': '11556Ki'}}]},
  {'metadata': {'name': 'nodejs-deployment-589bcd69d-2w7c9',
    'namespace': 'default',
    'creationTimestamp': '2025-07-16T03:13:14Z',
    'labels': {'app': 'nodejs', 'pod-template-hash': '589bcd69d'}},
   'timestamp': '2025-07-16T03:13:06Z',
   'window': '15.007s',
   'containers': [{'name': 'app',
     'usage': {'cpu': '51709n', 'memory': '11528Ki'}}]},
  {'metadata': {'name': 'nodejs-deployment-589bcd69d-48w87',
    'namespace': 'default',
    'creationTimestamp': '2025-07-16T03:13:14Z',
    'labels': {'app': 'nodejs', 'pod-template-

In [5]:
core = client.CoreV1Api()

In [6]:
def parse_cpu_value(cpu_str):
    """Parse CPU value from kubernetes format to cores (float)"""
    try:
        if cpu_str.endswith("m"):
            return float(cpu_str[:-1]) / 1000
        if cpu_str.endswith("n"):
            return float(cpu_str[:-1]) / 1000000000
        if cpu_str.endswith("u"):
            return float(cpu_str[:-1]) / 1000000
        return float(cpu_str)
    except (ValueError, IndexError) as e:
        print(f"Could not parse CPU value '{cpu_str}': {e}")
        return 0.0

def parse_memory_value(memory_str):
    """Parse memory value from kubernetes format to MB (float)"""
    try:
        if memory_str.endswith("Ki"):
            return float(memory_str[:-2]) / 1024
        if memory_str.endswith("Mi"):
            return float(memory_str[:-2])
        if memory_str.endswith("Gi"):
            return float(memory_str[:-2]) * 1024
        if memory_str.endswith("Ti"):
            return float(memory_str[:-2]) * 1024 * 1024
        return float(memory_str) / (1024 * 1024)
    except (ValueError, IndexError) as e:
        print(f"Could not parse memory value '{memory_str}': {e}")
        return 0.0

In [7]:
def get_node_capacity():
    """Get total CPU and memory capacity across all nodes"""
    try:
        nodes = core.list_node()
        total_cpu = 0
        total_memory = 0

        for node in nodes.items:
            if getattr(node.spec, "unschedulable", False):
                print(f"Node {node.metadata.name}: SKIPPED (unschedulable)")
                continue

            taints = getattr(node.spec, 'taints', []) or []
            has_no_schedule_taint = any(
                taint.effect == 'NoSchedule' and
                taint.key == 'node-role.kubernetes.io/control-plane'
                for taint in taints
            )

            if has_no_schedule_taint:
                print(f"Node {node.metadata.name}: SKIPPED (control plane taint)")
                continue

            allocatable = node.status.allocatable
            cpu_str = allocatable.get("cpu", "0")
            memory_str = allocatable.get("memory", "0")

            cpu_cores = parse_cpu_value(cpu_str)
            memory_mb = parse_memory_value(memory_str)

            total_cpu += cpu_cores
            total_memory += memory_mb

            print(
                f"Node {node.metadata.name}: CPU={cpu_cores:.2f} cores, "
                f"Memory={memory_mb:.2f} MB"
            )

        return total_cpu, total_memory

    except Exception as e:
        print(f"Could not get node capacity: {e}")
        return 4.0, 8192.0  # Default fallback


In [8]:
def get_node_resource_usage(node_cpu_total, node_memory_total):
    """Get current resource usage across schedulable nodes only"""
    try:
        # Get node metrics from metrics server
        node_metrics = api.list_cluster_custom_object(
            group="metrics.k8s.io", version="v1beta1", plural="nodes"
        )

        # Get list of schedulable nodes
        nodes = core.list_node()
        schedulable_node_names = set()

        for node in nodes.items:
            if getattr(node.spec, "unschedulable", False):
                continue

            taints = getattr(node.spec, "taints", []) or []
            has_no_schedule_taint = any(
                taint.effect == "NoSchedule"
                and taint.key == "node-role.kubernetes.io/control-plane"
                for taint in taints
            )

            if not has_no_schedule_taint:
                schedulable_node_names.add(node.metadata.name)

        total_cpu_used = 0
        total_memory_used = 0

        if node_metrics and "items" in node_metrics:
            for node in node_metrics["items"]:
                node_name = node["metadata"]["name"]

                # Only include schedulable nodes
                if node_name not in schedulable_node_names:
                    print(f"Node {node_name}: SKIPPED from metrics (not schedulable)")
                    continue

                cpu_str = node["usage"]["cpu"]
                memory_str = node["usage"]["memory"]

                cpu_cores = parse_cpu_value(cpu_str)
                memory_mb = parse_memory_value(memory_str)

                total_cpu_used += cpu_cores
                total_memory_used += memory_mb

                print(
                    f"Node {node_name}: CPU usage={cpu_cores:.3f} cores, Memory usage={memory_mb:.2f} MB"
                )

        # Calculate available percentages
        cpu_available_percent = max(0, 100 - (total_cpu_used / node_cpu_total * 100))
        memory_available_percent = max(
            0, 100 - (total_memory_used / node_memory_total * 100)
        )

        print(
            f"Total usage: CPU={total_cpu_used:.3f}/{node_cpu_total:.2f} cores, Memory={total_memory_used:.2f}/{node_memory_total:.2f} MB"
        )
        print(
            f"Available: CPU={cpu_available_percent:.1f}%, Memory={memory_available_percent:.1f}%"
        )

        return cpu_available_percent, memory_available_percent

    except Exception as e:
        print(f"Could not get node resource usage: {e}")
        return 50.0, 50.0  # Default fallback values


In [9]:
node_cpu_total, node_memory_total = get_node_capacity()


Node control: SKIPPED (control plane taint)
Node worker-1: CPU=6.00 cores, Memory=5824.61 MB




In [10]:
cpu_avail, memory_avail = get_node_resource_usage(node_cpu_total, node_memory_total)
print(f"CPU Available: {cpu_avail:.2f}%, Memory Available: {memory_avail:.2f}%")

Node control: SKIPPED from metrics (not schedulable)
Node worker-1: CPU usage=1.065 cores, Memory usage=1589.00 MB
Total usage: CPU=1.065/6.00 cores, Memory=1589.00/5824.61 MB
Available: CPU=82.2%, Memory=72.7%
CPU Available: 82.25%, Memory Available: 72.72%


