In [1]:
import logging, uuid, time, csv, datetime

from kubernetes import client, config, watch

logging.basicConfig(level=logging.INFO)
config.load_kube_config("/home/goughes/k8s/configs/erikdev-admin.yaml")
core_api = client.CoreV1Api()
batch_api = client.BatchV1Api()

In [2]:
class Kubernetes:
    def __init__(self):

        # Init Kubernetes
        self.core_api = client.CoreV1Api()
        self.batch_api = client.BatchV1Api()

    def get_all_namespaces(self):
        namespaces = self.core_api.list_namespace()
        all_namespaces = []
        for ns in namespaces.items:
            all_namespaces.append(ns.metadata.name)
        return all_namespaces
    
    @staticmethod
    def delete_pod(name, namespace):
        print("Deleting pod " + name)
        api_response = core_api.delete_namespaced_pod(name, 
                                                      namespace,
                                                      grace_period_seconds=0,
                                                      propagation_policy='Background')
        logging.debug(api_response)

    
    @staticmethod
    def delete_all_jobs(namespace):
        jobs = batch_api.list_namespaced_job(namespace, pretty=True, timeout_seconds=60)
        deleteoptions = client.V1DeleteOptions()
        for job in jobs.items:
            print("Deleting job " + job.metadata.name)
            jobname = job.metadata.name
            api_response = batch_api.delete_namespaced_job(jobname,
                                                           namespace,
                                                           grace_period_seconds=0, 
                                                           propagation_policy='Background')
            logging.debug(api_response)
   

In [3]:
k8s = Kubernetes()

all_pods = core_api.list_pod_for_all_namespaces()
#all_jobs = batch_api.list_job_for_all_namespaces()
# filter for pods with "tenant" in the name
all_tenant_pods = [ pod for pod in all_pods.items if "tenant" in pod.metadata.name]
#all_tenant_jobs = [ job for job in all_jobs.items if "tenant" in job.metadata.name]

tenants = ["tenant1", "tenant2", "tenant3", "tenant4",]
counts = {'tenant1_pending': 0,
          'tenant2_pending': 0,
          'tenant3_pending': 0,
          'tenant4_pending': 0,
          'tenant1_running': 0,
          'tenant2_running': 0,
          'tenant3_running': 0,
          'tenant4_running': 0,
          'tenant1_completed': 0,
          'tenant2_completed': 0,
          'tenant3_completed': 0,
          'tenant4_completed': 0,
         }

# collect job statistics for all tenants
for pod in all_tenant_pods:
    #break
    tenant = pod.metadata.name.split("-")[0]
    cores = int(pod.spec.containers[0].resources.limits['cpu'])
    if pod.status.phase == "Pending":
        counts[tenant+'_pending'] += cores
    elif pod.status.phase == "Running":
        counts[tenant+'_running'] += cores 
    elif pod.status.phase == "Succeeded":
        counts[tenant+'_completed'] += cores 
print(counts)

total_running = 0
total_pending = 0
for tenant in tenants:
    if tenant == "tenant4":
        continue
    total_running += counts[tenant+"_running"]
    total_pending += counts[tenant+"_pending"]
print("pending", total_pending, "running", total_running)

# get all running tenant4 pods
if total_pending > 0 and counts['tenant4_running'] > 0:
    tenant4_pods = [ pod for pod in all_tenant_pods if "tenant4" in pod.metadata.name]
    tenant4_running_pods = [ pod for pod in tenant4_pods if pod.status.phase == "Running"]
#print(tenant4_running_pods[0])

    # sort by time running
    tenant4_running_pods.sort(key=lambda e: e.metadata.creation_timestamp, reverse=True)
    #print(tenant4_running_pods[0].metadata.creation_timestamp)
    #print(tenant4_running_pods[-1].metadata.creation_timestamp)
    # definitely kill anything over a certain threshold

    threshold = 60
    for pod in tenant4_running_pods:
        current_time = datetime.datetime.now(tz=datetime.timezone.utc)
        print(current_time)
        print(pod.metadata.creation_timestamp)
        diff = current_time - pod.metadata.creation_timestamp
        print(diff.total_seconds())
        if diff.total_seconds() > threshold:
            k8s.delete_pod(pod.metadata.name, "tenant4")
            print("Kill", pod.metadata.name)
    # if nothing over the threshold, kill the most recent pod?
    #print(tenant4_running_pods[0])

{'tenant1_pending': 676, 'tenant2_pending': 620, 'tenant3_pending': 100, 'tenant4_pending': 216, 'tenant1_running': 92, 'tenant2_running': 92, 'tenant3_running': 24, 'tenant4_running': 168, 'tenant1_completed': 0, 'tenant2_completed': 56, 'tenant3_completed': 56, 'tenant4_completed': 0}
pending 1396 running 208
2024-02-15 21:59:42.954475+00:00
2024-02-15 21:57:10+00:00
152.954475
Deleting pod tenant4-burst-aa6602f9-814f-48db-b919-7f1320d9ff6c-frmrh
Kill tenant4-burst-aa6602f9-814f-48db-b919-7f1320d9ff6c-frmrh
2024-02-15 21:59:42.962026+00:00
2024-02-15 21:57:10+00:00
152.962026
Deleting pod tenant4-burst-f8630308-f13d-49af-b04f-5c2defe2b11f-tt2cx
Kill tenant4-burst-f8630308-f13d-49af-b04f-5c2defe2b11f-tt2cx
2024-02-15 21:59:42.967825+00:00
2024-02-15 21:57:09+00:00
153.967825
Deleting pod tenant4-burst-182c1c53-b213-459c-b908-cc140ea3f937-nnvzk
Kill tenant4-burst-182c1c53-b213-459c-b908-cc140ea3f937-nnvzk
2024-02-15 21:59:42.974676+00:00
2024-02-15 21:57:09+00:00
153.974676
Deleting po