In [None]:
#!/usr/bin/env python3

import json
import requests
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
from matplotlib_helper import *

In [None]:
# Metadata

metadata = {
    'emission_rates': {
        'ylabel': 'gCO2/s',
        'title': 'Instantaneous emission rates'
    },
    'emission_integral': {
        'ylabel': 'gCO2',
        'title': 'Emission integral over its duration'
    },
}

d_timing_labels = {
    "input_transfer_start": "Input transfer",
    # "input_transfer_start": "Start of input transfer",
    # "input_transfer_end": "End of input transfer",
    "compute_start": "Compute",
    # "compute_start": "Start of compute",
    # "compute_end": "End of compute",
    "output_transfer_start": "Output transfer",
    # "output_transfer_start": "Start of output transfer",
    # "output_transfer_end": "End of output transfer",
}

d_events = {
    'input_transfer': {
        'interval_keys': ("input_transfer_start", "input_transfer_end"),
        'label': 'Input transfer',
    },
    'compute': {
        'interval_keys': ("compute_start", "compute_end"),
        'label': 'Compute',
    },
    'output_transfer': {
        'interval_keys': ("output_transfer_start", "output_transfer_end"),
        'label': 'Output transfer',
    },
}

In [None]:
def get_max_value(data_details: dict, series_name: str):
    max_value = 0
    for region in data_details:
        compute_data = data_details[region][series_name]["compute"]
        transfer_data = data_details[region][series_name]["transfer"]
        max_value = max(max_value, max(compute_data.values(), default=0), max(transfer_data.values(), default=0))
    return max_value

def resample_timeseries(df: pd.DataFrame, interval: str):
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    df.set_index("Timestamp", inplace=True)
    df_resampled = df.resample(interval).ffill().reset_index()
    return df_resampled

def create_dataframe_for_plotting(timeseries: dict[str, float], min_start: datetime, max_end: datetime) -> pd.DataFrame:
    """Convert a time series data to a dataframe, while removing out of bound timestamps.
    
        Args:
            timeseries: A dictionary of timestamp strings and values.
            min_start: The minimum cutoff time for the timeseries.
            max_end: The maximum cutoff time for the timeseries.
    """
    timeseries_in_datatime = {datetime.fromisoformat(key): value for key, value in timeseries.items()}
    df = pd.DataFrame(list(timeseries_in_datatime.items()), columns=["Timestamp", "Value"])
    if df.empty:
        return df
    resampled = resample_timeseries(df, "30s")
    mask = (resampled["Timestamp"] >= pd.to_datetime(min_start)) & (resampled["Timestamp"] <= pd.to_datetime(max_end))
    return resampled[mask]

def add_timing(ax, name: str, time: pd.Timestamp, max_value: float, color: str):
    if 'start' in name:
        ax.vlines(time, ymin=0, ymax=max_value, color='gray', alpha=0.5, linestyles="solid" if 'compute' in name else "dashed")
        if 'input' in name:
            ha = 'right'
            rotation = -30
        elif 'output' in name:
            ha = 'left'
            rotation = 30
        else:
            ha = 'center'
            rotation = 0
        ax.text(time, max_value, d_timing_labels[name], color=color, alpha=0.95, ha=ha, va="bottom", rotation=rotation)

In [None]:
request_payload = {
    "runtime": 391,
    "schedule": {
        "type": "onetime",
        "start_time": "2023-05-24T22:00:00+00:00",
        "max_delay": 19800
    },
    "dataset": {
        "input_size_gb": 1.2,
        "output_size_gb": 0.25
    },
    "candidate_locations": [
        {
            "id": "Azure:eastus"
        },
        {
            "id": "Azure:westus"
        }
    ],
    "use_prediction": False,
    "carbon_data_source": "c3lab",
    "watts_per_core": 2.9,
    "core_count": 80,
    "original_location": "Azure:eastus",
    "optimize_carbon": True,
}

In [None]:
CARBON_API_URL='http://localhost:8082/carbon-aware-scheduler/'

# Make the API call
response = requests.get(CARBON_API_URL, json=request_payload)

# Check if the API call was successful (status code 200)
assert response.ok, f"Error: API call failed with status code {response.status_code}"
data = response.json()

In [None]:
# Plotting
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

d_region_colors = {}
for region in data['details']:
    d_region_colors[region] = get_next_color()

# Extract emission integral data
for (ax, series_name) in zip(axes, ["emission_rates", "emission_integral"]):
    # # Get max value for y-axis
    # max_y_value = get_max_value(data['details'], series_name)

    for region in data['details']:
        compute_data = data["details"][region][series_name]["compute"]
        transfer_data = data["details"][region][series_name]["transfer"]
        timings = data["details"][region]['timings'][0] # Assume single occurence per job
        min_start = datetime.fromisoformat(timings['min_start'])
        max_end = datetime.fromisoformat(timings['max_end'])

        # Convert timestamp strings to datetime objects
        compute_df = create_dataframe_for_plotting(compute_data, min_start, max_end)
        transfer_df = create_dataframe_for_plotting(transfer_data, min_start, max_end)

        # Plot timeseries data as step functions
        color = d_region_colors[region]
        ax.step(compute_df["Timestamp"], compute_df["Value"], label=f"{region} - Compute", color=color, linestyle="solid")
        if not transfer_df.empty:
            ax.step(transfer_df["Timestamp"], transfer_df["Value"], label=f"{region} - Transfer", color=color, linestyle="dashed")

        # Add events based on the timings
        max_y_value = max(compute_df["Value"].max(), transfer_df["Value"].max())
        for event in d_events:
            df = compute_df if event == 'compute' else transfer_df
            if df.empty:
                continue
            # Vertical lines and texts
            for name in d_events[event]['interval_keys']:
                add_timing(ax, name, pd.to_datetime(timings[name]), max_y_value, color=d_region_colors[region])
            # Fill area for events under the curve
            (start_event, end_event) = d_events[event]['interval_keys']
            ax.fill_between(x=df['Timestamp'], y1=df['Value'], where=((df['Timestamp'] >= pd.to_datetime(timings[start_event])) & (df['Timestamp'] <= pd.to_datetime(timings[end_event]))), color=color, alpha=0.5)

    ax.set_title(metadata[series_name]['title'])
    ax.set_xlabel("Time")
    ax.set_ylabel(metadata[series_name]['ylabel'])
    ax.grid(True)

plt.legend()
plt.show()
