# 0. Imports and helpers

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
# Define the functions
def mean_of_chunks_and_margin_error(np_array, window_size):
    chunks = [np_array[i:i + window_size] for i in range(0, len(np_array), window_size)]
    means = [np.mean(chunk) for chunk in chunks]
    return np.array(means)


def augment_host(host, augment_factor):
    return np.repeat(host, augment_factor)

def get_host_prediction(path):
    host_prediction = pd.read_parquet(path).groupby("timestamp")["energy_usage"].sum()
    host_prediction = augment_host(host_prediction, 10)
    host_prediction = host_prediction / 1e6
    return host_prediction

# 1. Energy.parquet, already given

In [3]:
# Load the energy data
energy_trace = pd.read_parquet("traces/surf-sara/trace/energy.parquet").groupby("timestamp")["energy"].sum()

# multiply all the value by 10
energy_trace = energy_trace * 12
energy_trace = energy_trace / 1e6

# search for the index of the value "2022-10-07 23:59:30+02:00" in timestamp
index = energy_trace.index.get_loc("2022-10-07 23:59:30+02:00")


# 2. Host.parquet, simulated

In [4]:
host_prediction_linear = get_host_prediction("output/surf-sara-sim/raw-output/0/seed=0/host.parquet")
host_prediction_cubic = get_host_prediction("output/surf-sara-sim/raw-output/1/seed=0/host.parquet")
host_prediction_square = get_host_prediction("output/surf-sara-sim/raw-output/2/seed=0/host.parquet")
host_prediction_sqrt = get_host_prediction("output/surf-sara-sim/raw-output/3/seed=0/host.parquet")

max_size = min(
    len(host_prediction_linear), 
    len(host_prediction_cubic), 
    len(host_prediction_square), 
    len(host_prediction_sqrt), 
    len(energy_trace)
)

max_size = index # we take only the data from the first day

host_prediction_linear = host_prediction_linear[:max_size]
host_prediction_cubic = host_prediction_cubic[:max_size]
host_prediction_square = host_prediction_square[:max_size]
host_prediction_sqrt = host_prediction_sqrt[:max_size]
energy_trace = energy_trace[:max_size]

FileNotFoundError: [Errno 2] No such file or directory: 'output/surf-sara-sim/raw-output/0/seed=0/host.parquet'

In [None]:
# Ensure both indices are datetime
energy_trace.index = pd.to_datetime(energy_trace.index)
host_prediction_linear.index = pd.to_datetime(energy_trace.index)
host_prediction_cubic.index = pd.to_datetime(energy_trace.index)
host_prediction_square.index = pd.to_datetime(energy_trace.index)
host_prediction_sqrt.index = pd.to_datetime(energy_trace.index)

In [None]:
energy_trace

# 3. Put the data together, in the same place

In [None]:
aligned_index = energy_trace.index.union(host_prediction_linear.index).union(host_prediction_cubic.index).union(host_prediction_square.index).union(host_prediction_sqrt.index)
energy_trace = energy_trace.reindex(aligned_index, fill_value=np.nan)
host_prediction_linear = host_prediction_linear.reindex(aligned_index, fill_value=np.nan)
host_prediction_cubic = host_prediction_cubic.reindex(aligned_index, fill_value=np.nan)
host_prediction_square = host_prediction_square.reindex(aligned_index, fill_value=np.nan)
host_prediction_sqrt = host_prediction_sqrt.reindex(aligned_index, fill_value=np.nan)

In [None]:
# Create a new dataframe
# Create a new dataframe
df = pd.DataFrame({
    "energy": energy_trace,
    "linear": host_prediction_linear,
    "cubic": host_prediction_cubic,
    "square": host_prediction_square,
    "sqrt": host_prediction_sqrt
})

In [None]:
# Plot the data with thinner lines
light_gray = "#D3D3D3"

plt.figure(figsize=(20, 10))
plt.plot(df.index, df["energy"], label="Energy Trace", color="blue", linewidth=1) # ground truth
plt.plot(df.index, df["linear"], label="Linear Power Model", color=light_gray, linewidth=1)
plt.plot(df.index, df["cubic"], label="Cubic Power Model", color=light_gray, linewidth=1)
plt.plot(df.index, df["square"], label="Square Power Model", color=light_gray, linewidth=1)
plt.plot(df.index, df["sqrt"], label="Sqrt Power Model", color=light_gray, linewidth=1)
plt.xlabel("Timestamp")
plt.ylabel("Energy [MWh]")
plt.title("Energy Trace and Host Predictions")
plt.ylim(7.25, 10)
plt.legend()
plt.savefig("output/surf-sara-sim/simulation-analysis/multimodel-groundtruth.pdf")
