In [None]:
import requests
import numpy as np
import os, sys
import random
import math
import bisect
import arrow
import pytz
from datetime import datetime, date, timedelta
from dateutil import tz
# from timezonefinder import TimezoneFinder
from matplotlib import pyplot as plt, dates
from matplotlib.ticker import *
from matplotlib_helper import *
from carbon_api_client import *
from typing import List
import matplotlib

In [None]:
enable_savefig = False

In [None]:
def get_all_fuel_types(energy_mixture_timeseries):
    fuel_types = set()
    for entry in energy_mixture_timeseries:
        fuel_types.update(entry['power_by_fuel_type'].keys())
    return fuel_types

In [None]:
def get_timeseries_by_fuel_type(energy_mixture_timeseries, fuel_types):
    d_timeseries_by_fuel_type = {}
    for fuel_type in fuel_types:
        d_timeseries_by_fuel_type[fuel_type] = []
    for entry in energy_mixture_timeseries:
        for fuel_type in fuel_types:
            value = entry['power_by_fuel_type'][fuel_type] if fuel_type in entry['power_by_fuel_type'] else 0.
            d_timeseries_by_fuel_type[fuel_type].append(value)
    return d_timeseries_by_fuel_type

In [None]:
# Similar to the bar plot in supply trend in http://www.caiso.com/todaysoutlook/pages/supply.html

region = 'US-MISO'
GPS = (41.5908, -93.6208)

## region = 'US-CAISO'
# GPS = (37.783, -122.417)

## region = 'US-PJM'
# GPS = (37.3719, -79.8164)



target_date = arrow.get(date(2023, 6, 1), tzinfo=tz.gettz('America/Los_Angeles'))
energy_mixture_timeseries = call_sysnet_energy_mixture_api(GPS[0], GPS[1], target_date, target_date.shift(days=1))
# print(energy_mixture_timeseries[:2])

timestamps = [e['timestamp'] for e in energy_mixture_timeseries]
# fuel_types = get_all_fuel_types(energy_mixture_timeseries)
fuel_types = { fuel_type for entry in energy_mixture_timeseries for fuel_type in entry['power_by_fuel_type'] }
d_fuel_type_timeseries = get_timeseries_by_fuel_type(energy_mixture_timeseries, fuel_types)

print('fuel_types', fuel_types)
# print('timestamps len:', len(timestamps))
# for fuel_type in d_fuel_type_timeseries:
#     print(fuel_type, 'len:', len(d_fuel_type_timeseries[fuel_type]))

plt.figure(figsize=(8, 4.8))
fuel_order = []
plot_colors = {
    'coal': 'black',
    'gas': '#F2915F',
    'nuclear': '#808080',
    'geothermal': 'brown',
    'biomass': 'yellowgreen',
    'hydro': '#40CDC4',
    'wind': 'royalblue',
    'solar': 'gold',
}
# del d_fuel_type_timeseries['battery']
plot_stacked_line(timestamps, d_fuel_type_timeseries, order=list(plot_colors.keys()), d_label_colors=plot_colors)

plt.xlabel('Time')
plt.ylabel('Power (MW)')
plt.title(f'Energy in MW by fuel type in {region} on {target_date.strftime("%Y-%m-%d")}')
# plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.legend(loc='center left', bbox_to_anchor=(1.04, 0.5), ncol=1)
plt.xticks(rotation=15)
# plt.ylim(0, 1000)
plt.tight_layout()
savefig_filename = 'energy-by-fuel-mix.%s.%s.png' % (target_date.strftime("%Y-%m-%d"), region)
if enable_savefig:
    plt.savefig(savefig_filename)

In [None]:
def get_total_amount(write_ratio, run_count):
    return 1 + write_ratio * (run_count - 1)

In [None]:
## I/O data size impact

matplotlib.rcParams.update({'font.size': 12})

plot_application_datapoints = True

l_wan_bandwidth = [
    # (125/1024, '125 Mbps (average TCP)'),
    (1, '1 Gbps'),
    (5, '5 Gbps (max VM)'),
    # (15, '15 Gbps (multiple VMs)'),
]

# l_wan_bandwidth = [
#     (20/1024, '20 Mbps (min TCP)'),
#     (400/1024, '400 Mbps (max TCP)'),
#     (5, '5 Gbps (max VM)'),
# ]

# l_wan_bandwidth = [
#     (125/1024, 'Single run (125 Mbps)'),
#     (125/1024 / (get_total_amount(0.05, 7) / 7), '7x run, 5% new data'),
#     (125/1024 / (get_total_amount(0.5, 7) / 7), '7x run, 50% new data'),
#     (125/1024 / (get_total_amount(0.05, 30) / 30), '30x run, 5% new data'),
#     (125/1024 / (get_total_amount(0.5, 30) / 30), '30x run, 50% new data'),
# ]

# On our Xeon gold 6138 CPUs, 250W for 40 cores
MACHINE_MAX_POWER = 240
CPU_KJ_PER_CORE_HOUR = MACHINE_MAX_POWER / 48 * 3600 / 1000
YMAX = 1.25
BITS_PER_BYTE = 8
TRANSFER_POWER_PERCENTAGE = 0.20
# From https://netsec.ethz.ch/publications/papers/green_routing2023.pdf
# Median is 0.035gCO2/Gb * 8 Gb/GB / (400 gCO2/kWh) * 3600 kJ/kWh = 2.52 kJ/GB
TRANSFER_NETWORK_KJ_PER_GB = 0.035 * 8 / 400 * 3600

m_app_kj_per_gb = {
    'Compression (gzip)': 0.47,
    # 'Compression (bzip)': 0.83,
    'Compile Linux': 76.42,
    'Video resizing (4k -> 1080p, h.264)': 1.41,
    'Video effect (4k, grayscale, h.264)': 11.53,
    'Video effect (4k, grayscale, h.265)': 96.8,
    # 'Video transcoding (h.264 -> h.265)': 19.54,
    # 'Video transcoding (h.265 -> h.264)': 82.67,
    'Single model ML inference': 1800,
}

def normalize_migration_overhead(compute_energy_kj: float, data_in_gb: float, bandwidth_gbps: float):
    # compute_cpu_hours = compute_energy_kj / CPU_KJ_PER_CORE_HOUR
    transfer_time_seconds = data_in_gb * BITS_PER_BYTE / bandwidth_gbps
    transfer_cpu_energy_kj = transfer_time_seconds * MACHINE_MAX_POWER / 1000 * TRANSFER_POWER_PERCENTAGE * 2   # Two end-hosts
    transfer_network_energy_kj = data_in_gb * TRANSFER_NETWORK_KJ_PER_GB
    print(transfer_cpu_energy_kj, transfer_network_energy_kj)
    return (transfer_cpu_energy_kj + transfer_network_energy_kj) / compute_energy_kj

plt.figure(figsize=(6, 3.6))
# x = np.logspace(-4, 2)
x = np.logspace(-1, 4)
ones = np.ones(len(x))
for wan_bandwidth_gbps, label in l_wan_bandwidth:
    y = list(map(normalize_migration_overhead, x, ones, ones * wan_bandwidth_gbps))
    plt.plot(x, y, label=label)
plt.xscale('log')
plt.hlines(1, x[0], x[-1], color='black', linestyles='dotted')
plt.yscale('log')
plt.ylim(1e-6, YMAX)
ax = plt.gca()
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: '{:.3g}%'.format(x * 100)))
plt.xlabel('Migration cost index (kJ/GB)')
plt.ylabel('Overhead (%)')
ax.grid(axis='y', which='major', alpha=0.5)
ax.grid(axis='y', which='minor', alpha=0.2)

if plot_application_datapoints:
    for app, kj_per_gb in m_app_kj_per_gb.items():
        cpu_hours_per_gb = kj_per_gb / CPU_KJ_PER_CORE_HOUR
        # plt.vlines(cpu_hours_per_gb, 0, YMAX, color='gray', linestyles='dashed')
        # plt.text(cpu_hours_per_gb, YMAX, app, rotation=30)
        plt.vlines(kj_per_gb, 0, YMAX, color='gray', linestyles='dashed')
        # Texts are too close
        if app == 'Compile Linux':
            kj_per_gb -= 20
        elif app == 'Video effect (4k, grayscale, h.265)':
            kj_per_gb += 20
        plt.text(kj_per_gb, YMAX, app, rotation=30)

plt.legend(loc='lower left')


In [None]:
l_write_ratio = [
    0,
    0.05,
    0.5,
    1,
]

l_run_count = np.array(range(30)) + 1

for write_ratio in l_write_ratio:
    x = l_run_count
    y = list(map(lambda run_count: get_total_amount(write_ratio, run_count) / run_count, l_run_count))
    plt.plot(x, y, label=f'{write_ratio * 100:.0f}% new data', marker='.')
plt.xlabel('# of runs')
plt.ylabel('Amortized data transfer per run')
plt.ylim(0, 1.05)
ax = plt.gca()
ax.grid(axis='y', which='major', alpha=0.5)
ax.grid(axis='y', which='minor', alpha=0.2)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: '{:.3g}%'.format(x * 100)))
plt.legend()