In [3]:
import numpy as np
import pandas as pd

# Loading csv
vm_data = pd.read_csv("sample_vm_data.csv")

# Slice vm_id
vm_data["vm_id"] = vm_data["vm_id"].str.slice(0, 8)

# Normalize core_usage values (0-1)
for col_prefix in ["min", "avg", "max_avg", "max"]:
    colname = col_prefix+"_core_usage"
    vm_data[colname] /= vm_data["vCPU"]

vm_data.head()

Unnamed: 0,vm_id,timestamp,min_cpu,avg_cpu,max_avg_cpu,max_cpu,vCPU,vm_category,min_core_usage,avg_core_usage,max_avg_core_usage,max_core_usage
0,9ZLTb00Z,0,0.672509,1.239094,2.432777,8.072619,2,Delay-insensitive,0.006725,0.012391,0.024328,0.080726
1,9ZLTb00Z,1,0.673908,1.898868,5.303702,28.246261,2,Delay-insensitive,0.006739,0.018989,0.053037,0.282463
2,9ZLTb00Z,2,0.666265,1.237352,2.334294,8.346915,2,Delay-insensitive,0.006663,0.012374,0.023343,0.083469
3,9ZLTb00Z,3,0.667036,1.286585,2.381045,8.184768,2,Delay-insensitive,0.00667,0.012866,0.02381,0.081848
4,9ZLTb00Z,4,0.672907,1.151142,1.476706,5.630045,2,Delay-insensitive,0.006729,0.011511,0.014767,0.0563


In [4]:
# on-demand, else 분리
is_on_demand = vm_data["vm_category"].str.lower().eq("interactive")
on_demand_df = vm_data.loc[is_on_demand].reset_index(drop=True)
batch_spot_df = vm_data.loc[~is_on_demand].reset_index(drop=True)

# spot vm, batch job 분리 
batch_spot_vm_id = batch_spot_df["vm_id"].unique()
rng = np.random.default_rng(42)
spot_vm_id = rng.choice(batch_spot_vm_id, size=len(batch_spot_vm_id)//2, replace=False)

spot_vm_df = batch_spot_df.loc[batch_spot_df["vm_id"].isin(spot_vm_id)].reset_index(drop=True)
batch_job_df = batch_spot_df.loc[~batch_spot_df["vm_id"].isin(spot_vm_id)].reset_index(drop=True)

# vm index 부여
on_demand_vm_map = dict(zip(on_demand_df["vm_id"].unique(), range(on_demand_df["vm_id"].nunique())))
on_demand_df["vm_id"] = on_demand_df["vm_id"].map(on_demand_vm_map)

spot_vm_map = dict(zip(spot_vm_df["vm_id"].unique(), range(spot_vm_df["vm_id"].nunique())))
spot_vm_df["vm_id"] = spot_vm_df["vm_id"].map(spot_vm_map)

batch_job_vm_map = dict(zip(batch_job_df["vm_id"].unique(), range(batch_job_df["vm_id"].nunique())))
batch_job_df["vm_id"] = batch_job_df["vm_id"].map(batch_job_vm_map)

print("# of on-demand VMs:", on_demand_df["vm_id"].nunique())
print("# of spot VMs:", spot_vm_df["vm_id"].nunique())
print("# of batch jobs (VM):", batch_job_df["vm_id"].nunique())

# on-demand VMs
on_demand_df.head()

# of on-demand VMs: 4
# of spot VMs: 2
# of batch jobs (VM): 3


Unnamed: 0,vm_id,timestamp,min_cpu,avg_cpu,max_avg_cpu,max_cpu,vCPU,vm_category,min_core_usage,avg_core_usage,max_avg_core_usage,max_core_usage
0,0,0,17.796666,20.80112,20.953196,22.686825,2,Interactive,0.177967,0.208011,0.209532,0.226868
1,0,1,13.438928,20.747045,20.942667,23.905032,2,Interactive,0.134389,0.20747,0.209427,0.23905
2,0,2,16.233479,20.797607,20.962849,22.147106,2,Interactive,0.162335,0.207976,0.209628,0.221471
3,0,3,16.473461,20.801938,20.956916,23.239626,2,Interactive,0.164735,0.208019,0.209569,0.232396
4,0,4,16.422663,20.84223,21.185867,23.196715,2,Interactive,0.164227,0.208422,0.211859,0.231967


In [11]:
import math
beta = 0.25

for a_j in range(24):
    max_workload = math.floor((23 - a_j) / (1+beta))
    print(a_j, (23 - a_j) / (1+beta), max_workload, a_j+max_workload+math.ceil(max_workload*beta))

0 18.4 18 23
1 17.6 17 23
2 16.8 16 22
3 16.0 16 23
4 15.2 15 23
5 14.4 14 23
6 13.6 13 23
7 12.8 12 22
8 12.0 12 23
9 11.2 11 23
10 10.4 10 23
11 9.6 9 23
12 8.8 8 22
13 8.0 8 23
14 7.2 7 23
15 6.4 6 23
16 5.6 5 23
17 4.8 4 22
18 4.0 4 23
19 3.2 3 23
20 2.4 2 23
21 1.6 1 23
22 0.8 0 22
23 0.0 0 23


In [14]:
2 + 16 + 16*beta

22.0

In [18]:
# data preparation

import math

usage_target = "max_avg_core_usage"
spot_buffer_ratio = 0.25  # Beta

# on-demand VM data
on_demand_vm_data = dict()
for vm_id, g in on_demand_df.groupby("vm_id"):
    timestamp = g["timestamp"].values
    usage = g[usage_target].values
    on_demand_vm_data[vm_id] = {
        "required_vCPU": g["vCPU"].unique()[0],
        "usage_data": dict(zip(timestamp, usage)),
        "a_j": timestamp.min(),
        "d_j": timestamp.max(),
    }

# spot VM data
spot_vm_data = dict()
for vm_id, g in spot_vm_df.groupby("vm_id"):
    timestamp = g["timestamp"].values
    usage = g[usage_target].values
    spot_vm_data[vm_id] = {
        "required_vCPU": g["vCPU"].unique()[0],
        "workloads": dict(zip(range(len(usage)), usage)),
        "a_k": timestamp.min(),
        "d_k": timestamp.min() + len(usage) + math.ceil(len(usage) * spot_buffer_ratio),
    }

# batch job data
batch_job_data = dict()
for vm_id, g in batch_job_df.groupby("vm_id"):
    timestamp = g["timestamp"].values
    usage = g[usage_target].values
    batch_job_data[vm_id] = {
        "required_vCPU": g["vCPU"].unique()[0],
        "workloads": dict(zip(range(len(usage)), usage)),
    }

In [None]:
from itertools import product

# ----------------------------
# Parameters
# ----------------------------
pCPU = vm_data["vCPU"].max()    # 각 서버의 pCPU 개수
E_idle, E_cpu, E_mig = 100, 300, 50
M_on, M_off = 6, 6

# server index
required_pCPU_per_time = vm_data.groupby("timestamp")["vCPU"].sum()
max_required_server = math.ceil((required_pCPU_per_time / pCPU).max())
I = list(range(max_required_server))

# VM index
J = list(sorted(on_demand_vm_data.keys()))
K = list(sorted(spot_vm_data.keys()))
L = list(sorted(batch_job_data.keys()))

# Workload index
W_sp = dict()
for k in K:
    W_sp[k] = list(spot_vm_data[k]["workloads"].keys())

W_bj = dict()
for l in L:
    W_bj[l] = list(batch_job_data[l]["workloads"].keys())

# Planning horizon
T = list(range(24))

# VM Activation periods
T_od = dict()
for j in J:
    a_j = on_demand_vm_data[j]["a_j"]
    d_j = on_demand_vm_data[j]["d_j"]
    T_od[j] = list(range(a_j, d_j+1))

T_sp = dict()
for k in K:
    a_k = spot_vm_data[k]["a_k"]
    d_k = spot_vm_data[k]["d_k"]
    T_sp[k] = list(range(a_k, d_k+1))

# vCPU utilization
c_od = dict()
for j in J:
    for t, c_t in on_demand_vm_data[j]["usage_data"]:
        c_od[j, t] = c_t

c_sp = dict()
for k in K:
    for n, c_t in spot_vm_data[k]["workloads"]:
        c_sp[k, n] = c_t

c_bj = dict()
for l in L:
    for n, c_t in batch_job_data[l]["workloads"]:
        c_bj[l, n] = c_t

: 

In [None]:
import gurobipy as gp
from gurobipy import GRB

# ----------------------------
# Init Gurobi Model
# ----------------------------
model = gp.Model("Cloud Operation")


# ----------------------------
# Define Decision Variables
# ----------------------------
# Server Provisioning
u = model.addVars(I, T, vtype=GRB.BINARY, name="u")
u_on = model.addVars(I, T, vtype=GRB.BINARY, name="u_on")
u_off = model.addVars(I, T, vtype=GRB.BINARY, name="u_off")
u_flag = model.addVars(I, vtype=GRB.BINARY, name="u_flag")

# VM Placement
x_indices = [(i, j, t) for i in I for j in J for t in T_od[j]]
x = model.addVars(x_indices, vtype=GRB.BINARY, name="x")

m_indices = [(j, t) for j in J for t in T_od[j][1:]]
m = model.addVars(m_indices, vtype=GRB.BINARY, name="m")

y_indices = [(i,k,n,t) for i in I for k in K for n in W_sp[k] for t in T_sp[k]]
y = model.addVars(y_indices, vtype=GRB.BINARY, name="y")

z_indices = [(i,l,n,t) for i in I for l in L for n in W_bj[l] for t in T]
z = model.addVars(z_indices, vtype=GRB.BINARY, name="z")

# Server Load
load_indices = [(i, t) for i in I for t in T]
load = model.addVars(load_indices, lb=0, vtype=GRB.CONTINUOUS, name="load")


# ----------------------------
# Add Constraints
# ----------------------------
# Server Provisioning
for i in I:
    for t in T[:-1]:
        model.addConstr(u[i,t] + u_on[i,t] - u_off[i,t] - u[i,t+1] == 0)

    for t in T:
        model.addConstrs(u_on[i,t] - u[i,l] <= 0 for l in range(t+1, min(T[-1],t+M_on)+1))
        model.addConstrs(u_off[i,t] + u[i,l] <= 1 for l in range(t+1, min(T[-1],t+M_off)+1))

# On-demand VM placement & Migration
model.addConstrs(gp.quicksum(x[i,j,t] for i in I) == 1 for j in J for t in T_od[j])
model.addConstrs(m[j,t] >= x[i,j,t] - x[i,j,t-1] for i in I for j in J for t in T_od[j][1:])
model.addConstrs(m[j,t] >= x[i,j,t-1] - x[i,j,t] for i in I for j in J for t in T_od[j][1:])

# Spot VM workload Placement (Preemption)
model.addConstrs(gp.quicksum(y[i,k,n,t] for i in I for n in W_sp[k]) <= 1 for k in K for t in T_sp[k])
model.addConstrs(gp.quicksum(y[i,k,n,t] for i in I for n in W_sp[k] for t in T_sp[k]) == len(W_sp[k]) for k in K)

# Batch Job workload Allocation (Splitting)
model.addConstrs(gp.quicksum(z[i,l,n,t] for i in I for n in W_bj[l] for t in T) == len(W_bj[l]) for l in L)

# Server Capacity
for i, t in product(I, T):
    on_demand_load = gp.quicksum(c_od[j,t] * x[i,j,t] for j in J)
    spot_load = gp.quicksum(c_sp[k,n] * y[i,k,n,t] for k in K for n in W_sp[k])
    batch_load = gp.quicksum(c_bj[l,n] * z[i,l,n,t] for l in L for n in W_bj[l])
    
    model.addConstr(load[i,t] == on_demand_load + spot_load + batch_load)
    model.addConstr(load[i,t] <= u[i,t])

# Special Constraint
# model.addConstrs(x[i,j,t] == 0 for i in I[5:] for j in J for t in active_hours[j])
# model.addConstr(gp.quicksum(q[j,t] for j in J for t in active_hours[j][1:]) <= 20)
# model.addConstr(gp.quicksum(q[j,t] for j in J for t in active_hours[j][1:]) >= 10)


# ----------------------------
# Set Objective
# ----------------------------
objective = "server"

if objective == "energy":
    # Energy Consumption Minimization
    energy_consumption_idle = E_idle * gp.quicksum(u[i,t] for i in I for t in T)
    energy_consumption_cpu = E_cpu / pCPU * gp.quicksum(L[i,t] for i in I for t in T)
    energy_consumption_mig = E_mig * gp.quicksum(m[j,t] for j in J for t in T_od[j][1:])
    
    m.setObjective(energy_consumption_idle + energy_consumption_cpu + energy_consumption_mig)
    
else:
    # Server Count Minimization
    gp.addConstrs(u_flag[i] >= u[i,t] for i in I for t in T)
    m.setObjective(gp.quicksum(u_flag[i] for i in I))

In [2]:
from itertools import product

list(product([1, 2], [3, 4]))

[(1, 3), (1, 4), (2, 3), (2, 4)]

In [None]:
import math
import gurobipy as gp
from gurobipy import GRB

# ----------------------------
# Parameters
# ----------------------------
PCPU = 24       # 각 서버의 pCPU 개수
D = 4           # batch + spot Deadline 설정
E_idle, E_cpu, E_mig = 100, 300, 50
M_on, M_off = 6, 6

# ----------------------------
# Data Driven Parameters
# ----------------------------
# server index
hourly_required_pCPU = vm_data.groupby("hour")["avg_core_usage"].sum()
max_required_server = math.ceil((hourly_required_pCPU / PCPU).max())
I = list(range(max_required_server))

# VM index
J = sorted(list(set(on_demand_df["vm_id"])))

# Time horizon
T_data = list(range(vm_data["hour"].max() + 1))
T = list(range(vm_data["hour"].max() + D + 1))

# W_t: time t에서 요청된 batch + spot workload
# (데이터는 실행된 시점이지만, 요청된 시점으로 간주함)
batch_spot_workload = batch_spot_df.groupby("hour")["avg_core_usage"].sum()
W = {t:batch_spot_workload[t].item() for t in T_data}

# a_j, d_j: on-demand VM j의 시작 및 종료 time period
vm_time_table = (
    on_demand_df
    .groupby("vm_id", as_index=False)
    .agg(a_j=("hour", "min"), d_j=("hour", "max"))
)
active_hours = {}
for j in J:
    a, d = vm_time_table.loc[j, "a_j":"d_j"].values
    active_hours[j] = list(range(a, d+1))

# c_jt: t시점에서 on-demand VM j의 실제 pCPU 사용량
_on_demand_df = on_demand_df.set_index(["vm_id", "hour"])
c = {(j, t): _on_demand_df.loc[(j, t), "avg_core_usage"].item()
     for j in J for t in active_hours[j]}

In [None]:
# ----------------------------
# Init Gurobi Model
# ----------------------------
m = gp.Model("Cloud Operation")

# ----------------------------
# Define Decision Variables
# ----------------------------
# u_it: Server on/off
u = m.addVars(I, T, vtype=GRB.BINARY, name="u")
u_on = m.addVars(I, T, vtype=GRB.BINARY, name="u_on")
u_off = m.addVars(I, T, vtype=GRB.BINARY, name="u_off")

# x_ijt: On-demand VM placement
x_indices = [(i, j, t) for i in I for j in J for t in active_hours[j]]
x = m.addVars(x_indices, vtype=GRB.BINARY, name="x")

# w_itl: Amount of batch + spot workload requested at time t, allocated to server i at time l
w_indices = [(i, t, l) for i in I for t in T_data for l in range(t+1, t+D+1)]
w = m.addVars(w_indices, lb=0.0, vtype=GRB.CONTINUOUS, name="w")

# L_it: Load of server i at time t
L = m.addVars(I, T, lb=0.0, vtype=GRB.CONTINUOUS, name="L")

# q_jt: On-demand VM j migrates at time t
q_indices = [(j, t) for j in J for t in active_hours[j][1:]]
q = m.addVars(q_indices, vtype=GRB.BINARY, name="q")

# ----------------------------
# Add Constraints
# ----------------------------
# Server Provisioning
for i in I:
    for t in T[:-1]:
        m.addConstr((u[i,t] + u_on[i,t] - u_off[i,t] - u[i,t+1]) == 0)

    for t in T[:-M_on]:
        m.addConstrs(u_on[i,t] - u[i,l] <= 0 for l in range(t+1, t+M_on+1))

    for t in T[:-M_off]:
        m.addConstrs(u_off[i,t] + u[i,l] <= 1 for l in range(t+1, t+M_off+1))

# On-demand VM placement
m.addConstrs(gp.quicksum(x[i,j,t] for i in I) == 1 for j in J for t in active_hours[j])

# Batch + spot workload allocation with deadline
for t in T_data:
    m.addConstr(gp.quicksum(w[i,t,l] for i in I for l in range(t+1, t+D+1)) == W[t])

# Server Capacity constraint
for i in I:
    for t in T:
        on_demand_workload = gp.quicksum(c[j,t]*x[i,j,t] for j in J if t in active_hours[j])
        batch_spot_workload = gp.quicksum(w[i,l,t] for l in range(max(t-D+1, 0), t+1) if l < t and l <= T_data[-1]) if t >= 1 else 0
        m.addConstr(L[i,t] == on_demand_workload + batch_spot_workload)
        m.addConstr(L[i,t] <= u[i,t])

# VM Migration model
m.addConstr


Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2762406


Academic license 2762406 - for non-commercial use only - registered to da___@yonsei.ac.kr


: 