In [98]:
from utils import load_problem_data, load_solution
from evaluation import evaluation_function, get_actual_demand
import pandas as pd
import numpy as np

demand, datacenters, servers, selling_prices = load_problem_data()

In [107]:
np.random.seed(3163)
actual_demand = get_actual_demand(demand)
actual_demand

latency_sensitivity,time_step,server_generation,high,low,medium
0,1,CPU.S1,2571,17471,2512
1,1,GPU.S1,43,13,7
2,2,CPU.S1,5860,36140,6397
3,2,GPU.S1,107,25,21
4,3,CPU.S1,7285,48584,7361
...,...,...,...,...,...
667,167,GPU.S3,1375,5274,8312
668,168,CPU.S3,88021,15649,0
669,168,CPU.S4,662906,911679,169388
670,168,GPU.S2,564,51,990


In [102]:
actual_demand.to_csv("actual_demand.csv", index=False)

In [108]:
import pandas as pd
import json

# Assuming `actual_demand` is the DataFrame that has been obtained using get_actual_demand(demand)

# Step 1: Filter the DataFrame to include all latency sensitivities
# No need to filter here, we will process all columns (high, medium, low)

# Step 2: Prepare the result list with the correct format
result = []

# Iterate over each time_step to aggregate demands for high, medium, and low latency
for time_step in actual_demand["time_step"].unique():
    # Filter data for the current time_step
    time_step_data = actual_demand[actual_demand["time_step"] == time_step]

    # Prepare dictionaries for each latency sensitivity
    dc_demands = {
        "DC1": {},
        "DC2": {},
        "DC3": {},
        "DC4": {},
    }

    for _, row in time_step_data.iterrows():
        server = row["server_generation"]

        # Distribute high demand
        if row["high"] > 0:
            high_demand_dc3 = int(row["high"] * 0.4588)
            high_demand_dc4 = int(row["high"] - high_demand_dc3)
            dc_demands["DC3"][server] = high_demand_dc3
            dc_demands["DC4"][server] = high_demand_dc4

        # Assign medium demand to DC1
        if row["low"] > 0:
            dc_demands["DC1"][server] = int(row["low"])

        # Assign low demand to DC2
        if row["medium"] > 0:
            dc_demands["DC2"][server] = int(row["medium"])

    # Append the formatted dictionary to the result list
    result.append(
        {
            "time_step": int(time_step),
            "DC1": dc_demands["DC1"],
            "DC2": dc_demands["DC2"],
            "DC3": dc_demands["DC3"],
            "DC4": dc_demands["DC4"],
        }
    )

# Step 3: Convert the result list to JSON and save to a file
with open("demand_by_time_step_and_data_center.json", "w") as json_file:
    json.dump(result, json_file, indent=4)

print("JSON file created successfully.")

JSON file created successfully.


In [None]:
import pandas as pd
import json

# Assuming `actual_demand` is the DataFrame that has been obtained using get_actual_demand(demand)

# Step 1: Filter the DataFrame to include only the 'medium' latency sensitivity
medium_demands = actual_demand[["time_step", "server_generation", "medium"]]
display(medium_demands)

# Step 2: Aggregate the demand per time_step and server_generation
# This will sum up the demands across all server types for each time_step
demand_per_time_step = (
    medium_demands.groupby(["time_step", "server_generation"])["medium"]
    .sum()
    .unstack(fill_value=0)
)

# Step 3: Prepare the result list with the correct format
result = []

# Iterate over each time_step and prepare the data for JSON output
for time_step in demand_per_time_step.index:
    # Extract the demand for the current time_step
    demand_data = demand_per_time_step.loc[time_step]

    # Create the demand dictionary, only including servers with non-zero demand
    demand_dict = {
        server: int(demand_data[server])
        for server in demand_data.index
        if demand_data[server] > 0
    }

    # Append the formatted dictionary to the result list
    result.append({"time_step": int(time_step), "demand": demand_dict})

# Step 4: Convert the result list to JSON and save to a file
with open("demand_by_time_step.json", "w") as json_file:
    json.dump(result, json_file, indent=4)

print("JSON file created successfully.")

latency_sensitivity,time_step,server_generation,medium
0,1,CPU.S1,9235
1,1,GPU.S1,0
2,2,CPU.S1,19722
3,2,GPU.S1,2
4,3,CPU.S1,27524
...,...,...,...
667,167,GPU.S3,8769
668,168,CPU.S3,279723
669,168,CPU.S4,282980
670,168,GPU.S2,1166


JSON file created successfully.


In [None]:
import pandas as pd
import json

# Assuming `actual_demand` is the DataFrame that has been obtained using get_actual_demand(demand)

# Step 1: Filter the DataFrame to include all latency sensitivities
# No need to filter here, we will process all columns (high, medium, low)

# Step 2: Prepare the result list with the correct format
result = []

# Iterate over each time_step to aggregate demands for high, medium, and low latency
for time_step in actual_demand["time_step"].unique():
    # Filter data for the current time_step
    time_step_data = actual_demand[actual_demand["time_step"] == time_step]

    # Prepare dictionaries for each latency sensitivity
    high_demand = {}
    medium_demand = {}
    low_demand = {}

    for _, row in time_step_data.iterrows():
        server = row["server_generation"]

        # Populate demand dictionaries
        if row["high"] > 0:
            high_demand[server] = int(row["high"])
        if row["medium"] > 0:
            medium_demand[server] = int(row["medium"])
        if row["low"] > 0:
            low_demand[server] = int(row["low"])

    # Append the formatted dictionary to the result list
    result.append(
        {
            "time_step": int(time_step),
            "high demand": high_demand,
            "medium demand": medium_demand,
            "low demand": low_demand,
        }
    )

# Step 3: Convert the result list to JSON and save to a file
with open("demand_by_time_step_and_latency_sensitivity.json", "w") as json_file:
    json.dump(result, json_file, indent=4)

print("JSON file created successfully.")

JSON file created successfully.


In [None]:
def filter_by_time_step_and_sensitivity(df, time_step, sensitivity):
    """
    Filters the DataFrame by the given time_step and latency_sensitivity level.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the actual demand data.
        time_step (int): The time_step value to filter by.
        sensitivity (str): The latency sensitivity level to filter by ('low', 'high', 'medium').

    Returns:
        pd.DataFrame: The filtered DataFrame containing only the specified latency sensitivity level.
    """
    # Filter by time_step
    filtered_df = df[df["time_step"] == time_step]

    # Select only the columns that match the sensitivity
    if sensitivity in ["low", "high", "medium"]:
        filtered_df = filtered_df[["server_generation", sensitivity]]

    return filtered_df


# Example usage:
# Assuming actual_demand is your DataFrame
chosen_time_step = 1
chosen_sensitivity = "medium"
actual_demand_filter_by_time_step_and_sensitivity = filter_by_time_step_and_sensitivity(
    actual_demand, chosen_time_step, chosen_sensitivity
)

# Display the filtered DataFrame
actual_demand_filter_by_time_step_and_sensitivity

latency_sensitivity,server_generation,medium
0,CPU.S1,9235
1,GPU.S1,0


In [None]:
import pandas as pd

# Your list of all possible servers
all_servers = {"CPU.S1", "CPU.S2", "CPU.S3", "CPU.S4", "GPU.S1", "GPU.S2", "GPU.S3"}

# Filter rows where the "low" column is greater than zero
low_latency_servers = actual_demand[actual_demand["low"] > 0][
    "server_generation"
].unique()

# Convert the result to a set
low_latency_servers_set = set(low_latency_servers)

# Check if all servers are used in low latency sensitivity
all_servers_used_in_low = all_servers.issubset(low_latency_servers_set)

if all_servers_used_in_low:
    print("All servers are used in low latency sensitivity.")
else:
    print("Not all servers are used in low latency sensitivity.")

All servers are used in low latency sensitivity.


In [None]:
data_center = pd.read_csv('./data/datacenters.csv')
data_center

filtered_dc = data_center[data_center['latency_sensitivity'] == 'low']
DC1_slot_capacity = filtered_dc["slots_capacity"].values[0]
print("Slots capacity for DC1(Low): ", filtered_dc["slots_capacity"].values[0])

Slots capacity for DC1(Low):  25245


In [None]:
import numpy as np
import pandas as pd

# Assuming `actual_demand` is your initial DataFrame

# Step 1: Process Low Demands
low_demands = actual_demand[["time_step", "server_generation", "medium"]]

# Calculate server usage
server_usage = (
    low_demands.groupby("time_step")["server_generation"].nunique().reset_index()
)
server_usage.columns = ["time_step", "server_used"]

# Aggregate demand per time_step
demand_per_time_step = (
    low_demands.groupby(["time_step", "server_generation"])["medium"]
    .sum()
    .unstack(fill_value=0)
)

# Merge the two DataFrames
result = pd.merge(server_usage, demand_per_time_step, on="time_step")

# Step 2: Load Server Capacities
server_capacity = pd.read_csv("./data/servers.csv")

# Convert server capacities to a dictionary for easy lookup
server_capacity_dict = dict(
    zip(server_capacity["server_generation"], server_capacity["capacity"])
)

# Calculate how many of each server is needed for each time_step
for server in server_capacity_dict.keys():
    result[f"{server}_needed"] = (result[server] / server_capacity_dict[server]).apply(
        np.ceil
    )

# Fill NaN values with 0 (in case some servers weren't used at all)
result = result.fillna(0)

# Step 3: Define Slot Capacities and Calculate Slots Needed
cpu_slot_capacity = 2
gpu_slot_capacity = 4

# Calculate total CPU slots needed
cpu_columns = [col for col in result.columns if col.startswith("CPU") and col.endswith("needed")]
result["CPU_slots_needed"] = (
    result[cpu_columns].sum(axis=1) * cpu_slot_capacity
).apply(np.ceil)

# Calculate total GPU slots needed
gpu_columns = [
    col for col in result.columns if col.startswith("GPU") and col.endswith("needed")
]
result["GPU_slots_needed"] = (
    result[gpu_columns].sum(axis=1) * gpu_slot_capacity
).apply(np.ceil)

# Total slots needed
result["slots_needed"] = result["CPU_slots_needed"] + result["GPU_slots_needed"]

# Drop intermediate columns if desired
result = result.drop(columns=["CPU_slots_needed", "GPU_slots_needed"])

# Review the final DataFrame
result

Unnamed: 0,time_step,server_used,CPU.S1,CPU.S2,CPU.S3,CPU.S4,GPU.S1,GPU.S2,GPU.S3,CPU.S1_needed,CPU.S2_needed,CPU.S3_needed,CPU.S4_needed,GPU.S1_needed,GPU.S2_needed,GPU.S3_needed,slots_needed
0,1,2,9235,0,0,0,0,0,0,154.0,0.0,0.0,0.0,0.0,0.0,0.0,308.0
1,2,2,19722,0,0,0,2,0,0,329.0,0.0,0.0,0.0,1.0,0.0,0.0,662.0
2,3,2,27524,0,0,0,0,0,0,459.0,0.0,0.0,0.0,0.0,0.0,0.0,918.0
3,4,2,37118,0,0,0,3,0,0,619.0,0.0,0.0,0.0,1.0,0.0,0.0,1242.0
4,5,2,52385,0,0,0,13,0,0,874.0,0.0,0.0,0.0,2.0,0.0,0.0,1756.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,164,4,0,0,215588,204116,0,1138,7082,0.0,0.0,1797.0,1276.0,0.0,143.0,886.0,10262.0
164,165,4,0,0,226351,247261,0,1191,7096,0.0,0.0,1887.0,1546.0,0.0,149.0,887.0,11010.0
165,166,4,0,0,247293,220712,0,1162,7776,0.0,0.0,2061.0,1380.0,0.0,146.0,972.0,11354.0
166,167,4,0,0,259962,285805,0,1186,8769,0.0,0.0,2167.0,1787.0,0.0,149.0,1097.0,12892.0
