In [2]:
import pandas as pd

vm_data = pd.read_csv("sample_vm_data.csv")
vm_data["vm_id"] = vm_data["vm_id"].str.slice(0, 6)
vm_data.head()

Unnamed: 0,vm_id,hour,min_cpu,avg_cpu,max_cpu,vCPU,vm_category,min_core_usage,avg_core_usage,max_core_usage
0,+1CC6E,0,2.67684,4.676953,34.496406,2,Delay-insensitive,0.053537,0.093539,0.689928
1,+1CC6E,1,2.640696,4.641649,33.07198,2,Delay-insensitive,0.052814,0.092833,0.66144
2,+1CC6E,2,2.630494,4.638883,35.5531,2,Delay-insensitive,0.05261,0.092778,0.711062
3,+1CC6E,3,2.661216,4.652888,33.991978,2,Delay-insensitive,0.053224,0.093058,0.67984
4,+1CC6E,4,2.774083,5.593173,87.126481,2,Delay-insensitive,0.055482,0.111863,1.74253


In [3]:
# workload 분리
is_on_demand = vm_data["vm_category"].str.lower().eq("interactive")
on_demand_df = vm_data.loc[is_on_demand].reset_index(drop=True)
batch_spot_df = vm_data.loc[~is_on_demand].reset_index(drop=True)

# on-demand VMs
vm_hash = on_demand_df["vm_id"].unique()
vm_id_map = dict(zip(vm_hash, range(len(vm_hash))))
on_demand_df["vm_id"] = on_demand_df["vm_id"].map(vm_id_map)
on_demand_df

Unnamed: 0,vm_id,hour,min_cpu,avg_cpu,max_cpu,vCPU,vm_category,min_core_usage,avg_core_usage,max_core_usage
0,0,0,1.078837,1.690948,9.506349,2,Interactive,0.021577,0.033819,0.190127
1,0,1,0.994674,1.983506,17.251207,2,Interactive,0.019893,0.039670,0.345024
2,0,2,1.032647,1.698869,8.652320,2,Interactive,0.020653,0.033977,0.173046
3,0,3,1.097672,2.097131,29.076358,2,Interactive,0.021953,0.041943,0.581527
4,0,4,1.104277,1.631217,9.094230,2,Interactive,0.022086,0.032624,0.181885
...,...,...,...,...,...,...,...,...,...,...
6018,254,19,5.816900,9.492391,56.868283,2,Interactive,0.116338,0.189848,1.137366
6019,254,20,5.774605,9.548711,59.471706,2,Interactive,0.115492,0.190974,1.189434
6020,254,21,5.907553,9.556688,61.024761,2,Interactive,0.118151,0.191134,1.220495
6021,254,22,5.986690,10.712679,64.268974,2,Interactive,0.119734,0.214254,1.285379


In [4]:
on_demand_df.describe()

Unnamed: 0,vm_id,hour,min_cpu,avg_cpu,max_cpu,vCPU,min_core_usage,avg_core_usage,max_core_usage
count,6023.0,6023.0,6023.0,6023.0,6023.0,6023.0,6023.0,6023.0,6023.0
mean,126.771044,11.555205,5.715911,11.103834,31.135873,3.079528,0.17601,0.343018,0.948414
std,73.659473,6.930329,9.461078,14.95806,27.890429,2.772957,0.350937,0.613306,1.311694
min,0.0,0.0,0.001192,0.096501,0.568587,2.0,4.8e-05,0.005531,0.011372
25%,63.0,6.0,1.370886,3.086517,10.082912,2.0,0.036642,0.074205,0.21917
50%,127.0,12.0,2.965487,5.706586,18.869071,2.0,0.072005,0.12987,0.514282
75%,190.0,18.0,5.321746,11.78276,46.878723,4.0,0.121975,0.317901,1.288016
max,254.0,23.0,85.416649,98.07567,99.384651,24.0,4.867314,9.453995,18.839069


In [5]:
batch_spot_df.groupby("hour")["avg_core_usage"].sum()[0].item()

292.27505513492105

In [5]:
batch_spot_workload = batch_spot_df.groupby("hour")["avg_core_usage"].sum()
batch_spot_workload

hour
0     292.275055
1     293.746548
2     298.939858
3     301.312576
4     296.759374
5     285.484313
6     271.851183
7     254.931981
8     256.342277
9     260.704574
10    268.696424
11    298.582663
12    296.215529
13    301.814167
14    302.751593
15    301.714000
16    294.007415
17    301.174695
18    294.610913
19    289.503528
20    291.087262
21    291.983679
22    287.980900
23    284.483908
Name: avg_core_usage, dtype: float64

In [None]:
import math
import gurobipy as gp
from gurobipy import GRB

# ----------------------------
# Parameters
# ----------------------------
PCPU = 24       # 각 서버의 pCPU 개수
D = 4           # batch + spot Deadline 설정
E_idle, E_cpu, E_mig = 100, 300, 50
M_on, M_off = 6, 6

# ----------------------------
# Data Driven Parameters
# ----------------------------
# server index
hourly_required_pCPU = vm_data.groupby("hour")["avg_core_usage"].sum()
max_required_server = math.ceil((hourly_required_pCPU / PCPU).max())
I = list(range(max_required_server))

# VM index
J = sorted(list(set(on_demand_df["vm_id"])))

# Time horizon
T_data = list(range(vm_data["hour"].max() + 1))
T = list(range(vm_data["hour"].max() + D + 1))

# W_t: time t에서 요청된 batch + spot workload
# (데이터는 실행된 시점이지만, 요청된 시점으로 간주함)
batch_spot_workload = batch_spot_df.groupby("hour")["avg_core_usage"].sum()
W = {t:batch_spot_workload[t].item() for t in T_data}

# a_j, d_j: on-demand VM j의 시작 및 종료 time period
vm_time_table = (
    on_demand_df
    .groupby("vm_id", as_index=False)
    .agg(a_j=("hour", "min"), d_j=("hour", "max"))
)
active_hours = {}
for j in J:
    a, d = vm_time_table.loc[j, "a_j":"d_j"].values
    active_hours[j] = list(range(a, d+1))

# c_jt: t시점에서 on-demand VM j의 실제 pCPU 사용량
_on_demand_df = on_demand_df.set_index(["vm_id", "hour"])
c = {(j, t): _on_demand_df.loc[(j, t), "avg_core_usage"].item()
     for j in J for t in active_hours[j]}

In [None]:
# ----------------------------
# Init Gurobi Model
# ----------------------------
m = gp.Model("Cloud Operation")

# ----------------------------
# Define Decision Variables
# ----------------------------
# u_it: Server on/off
u = m.addVars(I, T, vtype=GRB.BINARY, name="u")
u_on = m.addVars(I, T, vtype=GRB.BINARY, name="u_on")
u_off = m.addVars(I, T, vtype=GRB.BINARY, name="u_off")

# x_ijt: On-demand VM placement
x_indices = [(i, j, t) for i in I for j in J for t in active_hours[j]]
x = m.addVars(x_indices, vtype=GRB.BINARY, name="x")

# w_itl: Amount of batch + spot workload requested at time t, allocated to server i at time l
w_indices = [(i, t, l) for i in I for t in T_data for l in range(t+1, t+D+1)]
w = m.addVars(w_indices, lb=0.0, vtype=GRB.CONTINUOUS, name="w")

# L_it: Load of server i at time t
L = m.addVars(I, T, lb=0.0, vtype=GRB.CONTINUOUS, name="L")

# q_jt: On-demand VM j migrates at time t
q_indices = [(j, t) for j in J for t in active_hours[j][1:]]
q = m.addVars(q_indices, vtype=GRB.BINARY, name="q")

# ----------------------------
# Add Constraints
# ----------------------------
# Server Provisioning
for i in I:
    for t in T[:-1]:
        m.addConstr((u[i,t] + u_on[i,t] - u_off[i,t] - u[i,t+1]) == 0)

    for t in T[:-M_on]:
        m.addConstrs(u_on[i,t] - u[i,l] <= 0 for l in range(t+1, t+M_on+1))

    for t in T[:-M_off]:
        m.addConstrs(u_off[i,t] + u[i,l] <= 1 for l in range(t+1, t+M_off+1))

# On-demand VM placement
m.addConstrs(gp.quicksum(x[i,j,t] for i in I) == 1 for j in J for t in active_hours[j])

# Batch + spot workload allocation with deadline
for t in T_data:
    m.addConstr(gp.quicksum(w[i,t,l] for i in I for l in range(t+1, t+D+1)) == W[t])

# Server Capacity constraint
for i in I:
    for t in T:
        on_demand_workload = gp.quicksum(c[j,t]*x[i,j,t] for j in J if t in active_hours[j])
        batch_spot_workload = gp.quicksum(w[i,l,t] for l in range(max(t-D+1, 0), t+1) if l < t and l <= T_data[-1]) if t >= 1 else 0
        m.addConstr(L[i,t] == on_demand_workload + batch_spot_workload)
        m.addConstr(L[i,t] <= u[i,t])

# VM Migration model
m.addConstr


Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2762406


Academic license 2762406 - for non-commercial use only - registered to da___@yonsei.ac.kr


: 