In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
from typing import Dict, List, Tuple

# Node Specifications (Using node-32 specs for all nodes)
NODE_SPEC = {
    "cpu_cores": 32,  # 16 cores * 2
    "memory_gb": 256,
    "gpu_count": 8,  # NVIDIA Tesla V100 (32GB each)
    "gpu_memory_gb": 32 * 8,  # Total GPU memory
    "power_active": 4400,  # Watts (2200 * 2)
    "power_idle": 3080,    # 70% of active
    "power_standby": 440   # 10% of active
}

# Constants from the paper
JOB_TYPES = ["AR", "IM", "BE"]  # Advance Reservation, Immediate, Best Effort
JOB_DISTRIBUTION = {"AR": 0.2, "IM": 0.3, "BE": 0.5}  # Percentage of each job type
IDLE_THRESHOLD_MINUTES = 30  # I_th
MIN_IDLE_HOSTS = 1

# Load workload data (start and end resources are identical in your data)
start_resources = pd.DataFrame({
    "hour": ["2024-11-04 14:00:00", "2024-11-04 15:00:00", 
             "2024-11-04 16:00:00", "2024-11-04 17:00:00"],
    "cpu_load": [338.745, 347.4925, 386.78, 415.544166666667],
    "cpu_alloc": [363.0, 363.0, 401.5, 435.666666666667],
    "cpu_idle": [101.0, 101.0, 62.5, 28.3333333333333],
    "cpu_total": [464.0, 464.0, 464.0, 464.0],
    "gpu_power_usage": [701.2725, 727.0759375, 740.7378125, 700.7115625],
    "gpu_mem_used": [22989.375, 24519.2083333333, 24766.25, 24732.7291666667],
    "gpu_gpu_usage": [65.5625, 91.75, 92.9895833333333, 61.0104166666667],
    "gpu_fan": [89.0, 89.0, 89.0, 89.0]
})

end_resources = start_resources.copy()  # End resources are 1 hour later, identical in your data

# Function to classify jobs based on resource usage
def classify_jobs(cpu_load: float, job_dist: Dict) -> List[Tuple[str, float]]:
    total_load = cpu_load
    jobs = []
    for job_type, perc in job_dist.items():
        load = total_load * perc
        jobs.append((job_type, load))
        
    
    return jobs

# Node class to manage state and resources
class Node:
    def __init__(self, name: str):
        self.name = name
        self.state = "Idle"  # Start in Idle state
        self.cpu_used = 0
        self.gpu_power_used = 0
        self.gpu_mem_used = 0
        self.vm_queue = []  # List of jobs (VM requests) in queue
        self.last_active_time = None
        self.available_time = 0  # Earliest available time for VMs

    def update_state(self, time_minutes: float, job_load: float = 0):
        if job_load > 0 and self.state in ["Idle", "Standby"]:
            self.state = "Active"
            self.last_active_time = time_minutes
        elif job_load == 0 and self.state == "Active":
            if time_minutes - self.last_active_time > IDLE_THRESHOLD_MINUTES:
                self.state = "Standby"
            else:
                self.state = "Idle"
        self.cpu_used = job_load

    def get_power_consumption(self) -> float:
        if self.state == "Active":
            return NODE_SPEC["power_active"]
        elif self.state == "Idle":
            return NODE_SPEC["power_idle"]
        else:  # Standby
            return NODE_SPEC["power_standby"]

# EAVMAT Scheduling Algorithm
def eavmat_scheduling(nodes: List[Node], workload: pd.DataFrame) -> Dict:
    results = {"energy_consumption": 0, "job_success_rate": 0, "active_hosts": 0}
    total_jobs = 0
    successful_jobs = 0

    for idx, row in workload.iterrows():
        time = pd.to_datetime(row["hour"])
        cpu_load = row["cpu_load"]
        gpu_power = row["gpu_power_usage"]
        gpu_mem = row["gpu_mem_used"]

        # Classify jobs into AR, IM, BE
        jobs = classify_jobs(cpu_load, JOB_DISTRIBUTION)
        total_jobs += len(jobs)

        # Sort nodes by available time (simplistic approach for demonstration)
        available_nodes = sorted(nodes, key=lambda x: x.available_time)
        active_nodes = [n for n in nodes if n.state == "Active"]
        idle_nodes = [n for n in nodes if n.state == "Idle"]

        for job_type, load in jobs:
            scheduled = False
            # Try to schedule on active nodes first
            for node in active_nodes:
                if node.cpu_used + load <= NODE_SPEC["cpu_cores"] * 100:  # Assume 100 units/core
                    node.cpu_used += load
                    node.update_state((time - pd.Timestamp("2024-11-04 14:00:00")).total_seconds() / 60, load)
                    node.available_time = (time + timedelta(hours=1) - pd.Timestamp("2024-11-04 14:00:00")).total_seconds() / 60
                    scheduled = True
                    successful_jobs += 1
                    break

            if not scheduled and idle_nodes:
                # Use idle node, transition to Active
                node = idle_nodes[0]
                if node.cpu_used + load <= NODE_SPEC["cpu_cores"] * 100:
                    node.cpu_used += load
                    node.update_state((time - pd.Timestamp("2024-11-04 14:00:00")).total_seconds() / 60, load)
                    node.available_time = (time + timedelta(hours=1) - pd.Timestamp("2024-11-04 14:00:00")).total_seconds() / 60
                    node.state = "Active"
                    scheduled = True
                    successful_jobs += 1

            if not scheduled and job_type in ["AR", "IM"]:
                # Preempt BE job if possible (simplified: assume BE jobs can be preempted)
                for node in active_nodes:
                    if node.vm_queue and any(j[0] == "BE" for j in node.vm_queue):
                        # Preempt BE job (remove it, schedule new job)
                        node.vm_queue = [j for j in node.vm_queue if j[0] != "BE"]
                        node.cpu_used += load
                        node.update_state((time - pd.Timestamp("2024-11-04 14:00:00")).total_seconds() / 60, load)
                        node.available_time = (time + timedelta(hours=1) - pd.Timestamp("2024-11-04 14:00:00")).total_seconds() / 60
                        scheduled = True
                        successful_jobs += 1
                        break

            if not scheduled:
                # Job rejected (e.g., for AR/IM if no resources available)
                continue

    # Calculate energy consumption over 4 hours (240 minutes)
    total_time_minutes = 240
    for node in nodes:
        state_time = total_time_minutes  # Simplified: assume node stays in current state for entire period
        results["energy_consumption"] += node.get_power_consumption() * state_time

    results["job_success_rate"] = (successful_jobs / total_jobs) * 100 if total_jobs > 0 else 0
    results["active_hosts"] = len([n for n in nodes if n.state == "Active"])

    return results

# Main execution
if __name__ == "__main__":
    # Initialize nodes (node-11, node-13, node-14)
    nodes = [Node(f"node-{i}") for i in [11, 13, 14]]

    # Run EAVMAT scheduling
    results = eavmat_scheduling(nodes, start_resources)

    # Print results
    print("Scheduling Results:")
    print(f"Total Energy Consumption (Watt-minutes): {results['energy_consumption']}")
    print(f"Job Success Rate (%): {results['job_success_rate']:.2f}")
    print(f"Number of Active Hosts: {results['active_hosts']}")

    # Optional: Print node states
    for node in nodes:
        print(f"Node {node.name} - State: {node.state}, CPU Used: {node.cpu_used}, Power: {node.get_power_consumption()} Watts")

Scheduling Results:
Total Energy Consumption (Watt-minutes): 2534400
Job Success Rate (%): 100.00
Number of Active Hosts: 1
Node node-11 - State: Active, CPU Used: 207.7720833333335, Power: 4400 Watts
Node node-13 - State: Idle, CPU Used: 0, Power: 3080 Watts
Node node-14 - State: Idle, CPU Used: 0, Power: 3080 Watts


In [7]:
type(workload["cpu_load"])

pandas.core.series.Series

## 加上更多資料

In [2]:
NODE_SPEC = {
    "node-32": {
        "cpu_cores": 32,  # 16 * 2
        "memory_gb": 256,
        "gpu_count": 8,  # NVIDIA Tesla V100 (32G) * 8
        "gpu_memory_gb": 32 * 8,  # Total GPU memory
        "power_active": 4400,  # 2200 * 2
        "power_idle": 3080,    # 70% of active
        "power_standby": 440   # 10% of active
    },
    "node-11": {
        "cpu_cores": 44,  # 22 * 2
        "memory_gb": 512,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # 1100 * 2
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-12": {
        "cpu_cores": 44,  # 22 * 2
        "memory_gb": 512,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # 1100 * 2
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-13": {
        "cpu_cores": 44,  # 22 * 2
        "memory_gb": 512,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # 1100 * 2
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-14": {
        "cpu_cores": 96,  # 24 * 4
        "memory_gb": 512,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # 1100 * 2
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "esxi-31": {
        "cpu_cores": 28,  # 14 * 2
        "memory_gb": 256,
        "gpu_count": 2,   # NVIDIA Tesla V100 (16G) * 2
        "gpu_memory_gb": 16 * 2,  # Total GPU memory
        "power_active": 3200,  # 1600 * 2
        "power_idle": 2240,    # 70% of active
        "power_standby": 320   # 10% of active
    },
    "node-15": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-31
        "gpu_count": 1,   # NVIDIA Tesla V100 (16G) * 1
        "gpu_memory_gb": 16,
        "power_active": 1600,  # Assumed from esxi-31
        "power_idle": 1120,    # 70% of active
        "power_standby": 160   # 10% of active
    },
    "node-16": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-31
        "gpu_count": 1,   # NVIDIA Tesla V100 (16G) * 1
        "gpu_memory_gb": 16,
        "power_active": 1600,  # Assumed from esxi-31
        "power_idle": 1120,    # 70% of active
        "power_standby": 160   # 10% of active
    },
    "node-17": {
        "cpu_cores": 36,  # 18 * 2
        "memory_gb": 256,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 3200,  # 1600 * 2
        "power_idle": 2240,    # 70% of active
        "power_standby": 320   # 10% of active
    },
    "node-18": {
        "cpu_cores": 36,  # 18 * 2
        "memory_gb": 256,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 3200,  # 1600 * 2
        "power_idle": 2240,    # 70% of active
        "power_standby": 320   # 10% of active
    },
    "node-19": {
        "cpu_cores": 80,  # 20 * 4
        "memory_gb": 128,
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 6400,  # 1600 * 4
        "power_idle": 4480,    # 70% of active
        "power_standby": 640   # 10% of active
    },
    "esxi-33": {
        "cpu_cores": 36,  # 18 * 2
        "memory_gb": 256,
        "gpu_count": 8,   # NVIDIA GeForce RTX2080Ti * 8
        "gpu_memory_gb": 11 * 8,  # Approx. 11GB per RTX2080Ti
        "power_active": 4400,  # 2200 * 2
        "power_idle": 3080,    # 70% of active
        "power_standby": 440   # 10% of active
    },
    "node-151": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-152": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-153": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-154": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-155": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-156": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-157": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-158": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-33
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-33
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "esxi-34": {
        "cpu_cores": 36,  # 18 * 2
        "memory_gb": 256,
        "gpu_count": 8,   # NVIDIA GeForce RTX2080Ti * 8
        "gpu_memory_gb": 11 * 8,  # Approx. 11GB per RTX2080Ti
        "power_active": 4400,  # 2200 * 2
        "power_idle": 3080,    # 70% of active
        "power_standby": 440   # 10% of active
    },
    "node-161": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-162": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-163": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-164": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-165": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-166": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-167": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 0,   # No GPU
        "gpu_memory_gb": 0,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "node-168": {
        "cpu_cores": 4,
        "memory_gb": 256,  # Assumed from esxi-34
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 2200,  # Assumed from esxi-34
        "power_idle": 1540,    # 70% of active
        "power_standby": 220   # 10% of active
    },
    "esxi-35": {
        "cpu_cores": 32,  # 16 * 2
        "memory_gb": 128,
        "gpu_count": 3,   # NVIDIA GeForce RTX2080Ti * 3
        "gpu_memory_gb": 11 * 3,  # Approx. 11GB per RTX2080Ti
        "power_active": 3200,  # 1600 * 2
        "power_idle": 2240,    # 70% of active
        "power_standby": 320   # 10% of active
    },
    "node-141": {
        "cpu_cores": 4,
        "memory_gb": 128,  # Assumed from esxi-35
        "gpu_count": 1,   # NVIDIA GeForce RTX2080Ti * 1
        "gpu_memory_gb": 11,
        "power_active": 1600,  # Assumed from esxi-35
        "power_idle": 1120,    # 70% of active
        "power_standby": 160   # 10% of active
    },
    "esxi-36": {
        "cpu_cores": 32,  # 16 * 2
        "memory_gb": 128,
        "gpu_count": 3,   # NVIDIA GeForce RTX3080 * 3
        "gpu_memory_gb": 12 * 3,  # Approx. 12GB per RTX3080
        "power_active": 2400,  # 2400 * 1
        "power_idle": 1680,    # 70% of active
        "power_standby": 240   # 10% of active
    },
    "node-171": {
        "cpu_cores": 4,
        "memory_gb": 128,  # Assumed from esxi-36
        "gpu_count": 1,   # NVIDIA GeForce RTX3080 * 1
        "gpu_memory_gb": 12,
        "power_active": 2400,  # Assumed from esxi-36
        "power_idle": 1680,    # 70% of active
        "power_standby": 240   # 10% of active
    },
    "node-172": {
        "cpu_cores": 4,
        "memory_gb": 128,  # Assumed from esxi-36
        "gpu_count": 2,   # NVIDIA GeForce RTX3080 * 2
        "gpu_memory_gb": 12 * 2,
        "power_active": 2400,  # Assumed from esxi-36
        "power_idle": 1680,    # 70% of active
        "power_standby": 240   # 10% of active
    },
    "esxi-37": {
        "cpu_cores": 32,  # 16 * 2
        "memory_gb": 128,
        "gpu_count": 3,   # NVIDIA GeForce RTX3080 * 3
        "gpu_memory_gb": 12 * 3,  # Approx. 12GB per RTX3080
        "power_active": 2400,  # 2400 * 1
        "power_idle": 1680,    # 70% of active
        "power_standby": 240   # 10% of active
    }
}

In [3]:
import pandas as pd
import numpy as np
from datetime import timedelta
from typing import Dict, List, Tuple

# Node Specifications (Updated with all nodes)


# Constants from the paper
JOB_TYPES = ["AR", "IM", "BE"]  # Advance Reservation, Immediate, Best Effort
JOB_DISTRIBUTION = {"AR": 0.2, "IM": 0.3, "BE": 0.5}  # Percentage of each job type
IDLE_THRESHOLD_MINUTES = 30  # I_th
MIN_IDLE_HOSTS = 1

# Load new workload data (15:00 to 16:00 on 2024-11-04)
workload_data = [
    {"hour": "2024-11-04 15:00:00", "nodename": "node-167", "cpu_load": 4.01083333333333, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-168", "cpu_load": 1.01833333333333, "cpu_alloc": 2.0, "cpu_idle": 2.0, "cpu_total": 4.0, "gpu_power_usage": 62.7775, "gpu_mem_used": 296.666666666667, "gpu_gpu_usage": 2.91666666666667, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-19", "cpu_load": 5.01083333333333, "cpu_alloc": 5.0, "cpu_idle": 75.0, "cpu_total": 80.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-171", "cpu_load": 0.00166666666666667, "cpu_alloc": 0.0, "cpu_idle": 4.0, "cpu_total": 4.0, "gpu_power_usage": 59.8541666666667, "gpu_mem_used": 1.0, "gpu_gpu_usage": 0.0, "gpu_fan": 30.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-18", "cpu_load": 28.0041666666667, "cpu_alloc": 36.0, "cpu_idle": 0.0, "cpu_total": 36.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-166", "cpu_load": 1.0225, "cpu_alloc": 2.0, "cpu_idle": 2.0, "cpu_total": 4.0, "gpu_power_usage": 75.3408333333333, "gpu_mem_used": 298.166666666667, "gpu_gpu_usage": 1.75, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-17", "cpu_load": 36.0, "cpu_alloc": 36.0, "cpu_idle": 0.0, "cpu_total": 36.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-165", "cpu_load": 4.00333333333333, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-16", "cpu_load": 0.000833333333333333, "cpu_alloc": 0.0, "cpu_idle": 4.0, "cpu_total": 4.0, "gpu_power_usage": 34.5375, "gpu_mem_used": 1.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-163", "cpu_load": 4.0125, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-162", "cpu_load": 4.13833333333333, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 93.5875, "gpu_mem_used": 7397.0, "gpu_gpu_usage": 33.8333333333333, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-161", "cpu_load": 4.0225, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 75.4791666666667, "gpu_mem_used": 279.0, "gpu_gpu_usage": 24.0833333333333, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-15", "cpu_load": 0.025, "cpu_alloc": 1.0, "cpu_idle": 3.0, "cpu_total": 4.0, "gpu_power_usage": 36.1758333333333, "gpu_mem_used": 6863.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-141", "cpu_load": 1.01333333333333, "cpu_alloc": 2.0, "cpu_idle": 2.0, "cpu_total": 4.0, "gpu_power_usage": 72.4166666666667, "gpu_mem_used": 296.333333333333, "gpu_gpu_usage": 1.16666666666667, "gpu_fan": 29.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-14", "cpu_load": 95.1041666666667, "cpu_alloc": 95.0, "cpu_idle": 1.0, "cpu_total": 96.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-13", "cpu_load": 44.1266666666667, "cpu_alloc": 44.0, "cpu_idle": 0.0, "cpu_total": 44.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-32", "cpu_load": 26.6775, "cpu_alloc": 30.0, "cpu_idle": 2.0, "cpu_total": 32.0, "gpu_power_usage": 102.4059375, "gpu_mem_used": 8790.20833333333, "gpu_gpu_usage": 26.4166666666667, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-11", "cpu_load": 44.1275, "cpu_alloc": 44.0, "cpu_idle": 0.0, "cpu_total": 44.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-164", "cpu_load": 1.01833333333333, "cpu_alloc": 2.0, "cpu_idle": 2.0, "cpu_total": 4.0, "gpu_power_usage": 59.665, "gpu_mem_used": 295.833333333333, "gpu_gpu_usage": 1.58333333333333, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-12", "cpu_load": 44.15, "cpu_alloc": 44.0, "cpu_idle": 0.0, "cpu_total": 44.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 15:00:00", "nodename": "node-172", "cpu_load": 0.00416666666666667, "cpu_alloc": 0.0, "cpu_idle": 4.0, "cpu_total": 4.0, "gpu_power_usage": 54.8358333333333, "gpu_mem_used": 1.0, "gpu_gpu_usage": 0.0, "gpu_fan": 30.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-161", "cpu_load": 4.00583333333333, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 74.98, "gpu_mem_used": 279.0, "gpu_gpu_usage": 24.9166666666667, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-32", "cpu_load": 27.5108333333333, "cpu_alloc": 30.0, "cpu_idle": 2.0, "cpu_total": 32.0, "gpu_power_usage": 91.5228125, "gpu_mem_used": 8959.25, "gpu_gpu_usage": 20.7395833333333, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-19", "cpu_load": 43.175, "cpu_alloc": 43.5, "cpu_idle": 36.5, "cpu_total": 80.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-18", "cpu_load": 28.0175, "cpu_alloc": 36.0, "cpu_idle": 0.0, "cpu_total": 36.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-172", "cpu_load": 0.000833333333333333, "cpu_alloc": 0.0, "cpu_idle": 4.0, "cpu_total": 4.0, "gpu_power_usage": 54.9441666666667, "gpu_mem_used": 1.0, "gpu_gpu_usage": 0.0, "gpu_fan": 30.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-171", "cpu_load": 0.00666666666666667, "cpu_alloc": 0.0, "cpu_idle": 4.0, "cpu_total": 4.0, "gpu_power_usage": 60.1966666666667, "gpu_mem_used": 1.0, "gpu_gpu_usage": 0.0, "gpu_fan": 30.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-17", "cpu_load": 36.0016666666667, "cpu_alloc": 36.0, "cpu_idle": 0.0, "cpu_total": 36.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-168", "cpu_load": 1.0125, "cpu_alloc": 2.0, "cpu_idle": 2.0, "cpu_total": 4.0, "gpu_power_usage": 58.6608333333333, "gpu_mem_used": 297.666666666667, "gpu_gpu_usage": 0.916666666666667, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-166", "cpu_load": 1.01916666666667, "cpu_alloc": 2.0, "cpu_idle": 2.0, "cpu_total": 4.0, "gpu_power_usage": 66.7841666666667, "gpu_mem_used": 295.0, "gpu_gpu_usage": 1.58333333333333, "gpu_fan": 0.0},
    {"hour": "2024-11-04 16:00:00", "nodename": "node-165", "cpu_load": 4.00083333333333, "cpu_alloc": 4.0, "cpu_idle": 0.0, "cpu_total": 4.0, "gpu_power_usage": 0.0, "gpu_mem_used": 0.0, "gpu_gpu_usage": 0.0, "gpu_fan": 0.0}
]

workload = pd.DataFrame(workload_data)

# Function to classify jobs based on resource usage
def classify_jobs(cpu_load: float, job_dist: Dict) -> List[Tuple[str, float]]:
    
    total_load = cpu_load    # type(cpu_load) = pandas.core.series.Series
    jobs = []
    for job_type, perc in job_dist.items(): # JOB_DISTRIBUTION = {"AR": 0.2, "IM": 0.3, "BE": 0.5}  # Percentage of each job type
        load = total_load * perc # 用電量沒那麼高
        jobs.append((job_type, load))
        
    print("jobsjobs", jobs)
    return jobs

# Node class to manage state and resources
class Node:
    def __init__(self, name: str):
        self.name = name
        self.state = "Idle"  # Start in Idle state
        self.cpu_used = 0
        self.gpu_power_used = 0
        self.gpu_mem_used = 0
        self.vm_queue = []  # List of jobs (VM requests) in queue
        self.last_active_time = None
        self.available_time = 0  # Earliest available time for VMs

    def update_state(self, time_minutes: float, job_load: float = 0):
        if job_load > 0 and self.state in ["Idle", "Standby"]:
            self.state = "Active"
            self.last_active_time = time_minutes
        elif job_load == 0 and self.state == "Active":
            if time_minutes - self.last_active_time > IDLE_THRESHOLD_MINUTES:
                self.state = "Standby"
            else:
                self.state = "Idle"
        self.cpu_used = job_load
        # Update GPU usage if applicable (simplified for now)
        if NODE_SPEC[self.name]["gpu_count"] > 0:
            self.gpu_power_used = job_load * 10  # Placeholder: adjust based on actual GPU load

    def get_power_consumption(self) -> float:
        if self.state == "Active":
            return NODE_SPEC[self.name]["power_active"]
        elif self.state == "Idle":
            return NODE_SPEC[self.name]["power_idle"]
        else:  # Standby
            return NODE_SPEC[self.name]["power_standby"]

    def can_handle_job(self, cpu_load: float, gpu_power: float, gpu_mem: float) -> bool:
        cpu_capacity = NODE_SPEC[self.name]["cpu_cores"] * 100  # Assume 100 units/core
        gpu_capacity = NODE_SPEC[self.name]["gpu_count"] * 100  # Placeholder GPU capacity
        return (self.cpu_used + cpu_load <= cpu_capacity and 
                (NODE_SPEC[self.name]["gpu_count"] == 0 or 
                 (self.gpu_power_used + gpu_power <= gpu_capacity and 
                  self.gpu_mem_used + gpu_mem <= NODE_SPEC[self.name]["gpu_memory_gb"] * 1024)))  # Convert GB to MB

# EAVMAT Scheduling Algorithm
def eavmat_scheduling(nodes: List[Node], workload: pd.DataFrame) -> Dict:
    results = {"energy_consumption": 0, "job_success_rate": 0, "active_hosts": 0}
    total_jobs = 0
    successful_jobs = 0

    for idx, row in workload.iterrows():
        time = pd.to_datetime(row["hour"])
        cpu_load = row["cpu_load"]
        gpu_power = row["gpu_power_usage"]
        gpu_mem = row["gpu_mem_used"]
        nodename = row["nodename"]

        # Classify jobs into AR, IM, BE for this node
        jobs = classify_jobs(cpu_load, JOB_DISTRIBUTION)
        total_jobs += len(jobs)

        # Get the specific node
        node = next(n for n in nodes if n.name == nodename)

        for job_type, load in jobs:
            scheduled = False
            # Try to schedule on the specific node first (since workload is per node)
            if node.can_handle_job(load, gpu_power, gpu_mem):
                node.cpu_used += load
                if NODE_SPEC[nodename]["gpu_count"] > 0:
                    node.gpu_power_used += gpu_power
                    node.gpu_mem_used += gpu_mem
                node.update_state((time - pd.Timestamp("2024-11-04 15:00:00")).total_seconds() / 60, load)
                node.available_time = (time + timedelta(hours=1) - pd.Timestamp("2024-11-04 15:00:00")).total_seconds() / 60
                scheduled = True
                successful_jobs += 1
            else:
                # If this node can't handle, try other active/idle nodes (simplified)
                other_nodes = [n for n in nodes if n.name != nodename and n.state in ["Active", "Idle"]]
                for other_node in sorted(other_nodes, key=lambda x: x.available_time):
                    if other_node.can_handle_job(load, gpu_power, gpu_mem):
                        other_node.cpu_used += load
                        if NODE_SPEC[other_node.name]["gpu_count"] > 0:
                            other_node.gpu_power_used += gpu_power
                            other_node.gpu_mem_used += gpu_mem
                        other_node.update_state((time - pd.Timestamp("2024-11-04 15:00:00")).total_seconds() / 60, load)
                        other_node.available_time = (time + timedelta(hours=1) - pd.Timestamp("2024-11-04 15:00:00")).total_seconds() / 60
                        other_node.state = "Active"
                        scheduled = True
                        successful_jobs += 1
                        break

            if not scheduled and job_type in ["AR", "IM"]:
                # Preempt BE job if possible (simplified: assume BE jobs can be preempted on this node or others)
                for target_node in [node] + other_nodes:
                    if target_node.vm_queue and any(j[0] == "BE" for j in target_node.vm_queue):
                        target_node.vm_queue = [j for j in target_node.vm_queue if j[0] != "BE"]
                        target_node.cpu_used += load
                        if NODE_SPEC[target_node.name]["gpu_count"] > 0:
                            target_node.gpu_power_used += gpu_power
                            target_node.gpu_mem_used += gpu_mem
                        target_node.update_state((time - pd.Timestamp("2024-11-04 15:00:00")).total_seconds() / 60, load)
                        target_node.available_time = (time + timedelta(hours=1) - pd.Timestamp("2024-11-04 15:00:00")).total_seconds() / 60
                        scheduled = True
                        successful_jobs += 1
                        break

            if not scheduled:
                # Job rejected (e.g., for AR/IM if no resources available)
                continue

    # Calculate energy consumption over 1 hour (60 minutes)
    total_time_minutes = 60
    for node in nodes:
        state_time = total_time_minutes  # Simplified: assume node stays in current state for entire period
        results["energy_consumption"] += node.get_power_consumption() * state_time

    results["job_success_rate"] = (successful_jobs / total_jobs) * 100 if total_jobs > 0 else 0
    results["active_hosts"] = len([n for n in nodes if n.state == "Active"])

    return results

# Main execution
if __name__ == "__main__":
    # Initialize all nodes
    nodes = [Node(name) for name in NODE_SPEC.keys()]

    # Run EAVMAT scheduling
    results = eavmat_scheduling(nodes, workload)

    # Print results
    print("Scheduling Results:")
    print(f"Total Energy Consumption (Watt-minutes): {results['energy_consumption']}")
    print(f"Job Success Rate (%): {results['job_success_rate']:.2f}")
    print(f"Number of Active Hosts: {results['active_hosts']}")

    # Optional: Print node states
    for node in nodes:
        print(f"Node {node.name} - State: {node.state}, CPU Used: {node.cpu_used}, GPU Power Used: {node.gpu_power_used}, Power: {node.get_power_consumption()} Watts")

jobsjobs [('AR', 0.802166666666666), ('IM', 1.2032499999999988), ('BE', 2.005416666666665)]
jobsjobs [('AR', 0.203666666666666), ('IM', 0.305499999999999), ('BE', 0.509166666666665)]
jobsjobs [('AR', 1.002166666666666), ('IM', 1.5032499999999989), ('BE', 2.505416666666665)]
jobsjobs [('AR', 0.000333333333333334), ('IM', 0.000500000000000001), ('BE', 0.000833333333333335)]
jobsjobs [('AR', 5.60083333333334), ('IM', 8.40125000000001), ('BE', 14.00208333333335)]
jobsjobs [('AR', 0.20450000000000002), ('IM', 0.30674999999999997), ('BE', 0.51125)]
jobsjobs [('AR', 7.2), ('IM', 10.799999999999999), ('BE', 18.0)]
jobsjobs [('AR', 0.8006666666666661), ('IM', 1.200999999999999), ('BE', 2.001666666666665)]
jobsjobs [('AR', 0.0001666666666666666), ('IM', 0.0002499999999999999), ('BE', 0.0004166666666666665)]
jobsjobs [('AR', 0.8025000000000001), ('IM', 1.20375), ('BE', 2.00625)]
jobsjobs [('AR', 0.8276666666666661), ('IM', 1.241499999999999), ('BE', 2.069166666666665)]
jobsjobs [('AR', 0.8045), (

In [4]:

# Main execution
if __name__ == "__main__":
    # Initialize all nodes
    nodes = [Node(name) for name in NODE_SPEC.keys()]

    # Run EAVMAT scheduling
    results = eavmat_scheduling(nodes, workload)

    # Print results
    print("Scheduling Results:")
    print(f"Total Energy Consumption (Watt-minutes): {results['energy_consumption']}")
    print(f"Job Success Rate (%): {results['job_success_rate']:.2f}")
    print(f"Number of Active Hosts: {results['active_hosts']}")

    # Optional: Print node states
    for node in nodes:
        print(f"Node {node.name} - State: {node.state}, CPU Used: {node.cpu_used}, GPU Power Used: {node.gpu_power_used}, Power: {node.get_power_consumption()} Watts")

jobsjobs [('AR', 0.802166666666666), ('IM', 1.2032499999999988), ('BE', 2.005416666666665)]
jobsjobs [('AR', 0.203666666666666), ('IM', 0.305499999999999), ('BE', 0.509166666666665)]
jobsjobs [('AR', 1.002166666666666), ('IM', 1.5032499999999989), ('BE', 2.505416666666665)]
jobsjobs [('AR', 0.000333333333333334), ('IM', 0.000500000000000001), ('BE', 0.000833333333333335)]
jobsjobs [('AR', 5.60083333333334), ('IM', 8.40125000000001), ('BE', 14.00208333333335)]
jobsjobs [('AR', 0.20450000000000002), ('IM', 0.30674999999999997), ('BE', 0.51125)]
jobsjobs [('AR', 7.2), ('IM', 10.799999999999999), ('BE', 18.0)]
jobsjobs [('AR', 0.8006666666666661), ('IM', 1.200999999999999), ('BE', 2.001666666666665)]
jobsjobs [('AR', 0.0001666666666666666), ('IM', 0.0002499999999999999), ('BE', 0.0004166666666666665)]
jobsjobs [('AR', 0.8025000000000001), ('IM', 1.20375), ('BE', 2.00625)]
jobsjobs [('AR', 0.8276666666666661), ('IM', 1.241499999999999), ('BE', 2.069166666666665)]
jobsjobs [('AR', 0.8045), (