In [None]:
import requests
import numpy as np
import os, sys
import math
import bisect
import arrow
import pytz
from datetime import datetime, date, timedelta
from timezonefinder import TimezoneFinder
from matplotlib import pyplot as plt, dates
from matplotlib.ticker import *
from matplotlib_helper import *
from typing import List

In [None]:
M_PUBLIC_CLOUD_LOCATION = {
    ('AWS', 'us-west-1'): (37.00578, -121.56828),
    ('AWS', 'us-west-2'): (45.840410, -119.289460),
    ('AWS', 'us-east-1'): (39.983334, -82.983330),
    ('AWS', 'us-east-2'): (39.040283, -77.485165),
}
def get_location_for_public_cloud(cloud_vendor, region):
    '''Looks up the GPS coordinate for public cloud region.'''
    if (cloud_vendor, region) in M_PUBLIC_CLOUD_LOCATION:
        return M_PUBLIC_CLOUD_LOCATION[(cloud_vendor, region)]
    else:
        return (math.nan, math.nan)

In [None]:
def plot_timeseries(data_array, plot_axis=None, timestamp_column_name='timestamp', prefix=None, use_relative_time=False, color=None, index=0):
    x = [entry[timestamp_column_name] for entry in data_array]
    if use_relative_time:
        start_time = x[0]
        x = [(t - start_time).total_seconds() for t in x]
    data_keys = []
    for key in data_array[0].keys():
        if key == timestamp_column_name:
            continue
        data_keys.append(key)
    lines = []
    for key in data_keys:
        data_series = [entry[key] for entry in data_array]
        label = (('%s - ' % prefix if prefix else '') + key) if len(data_keys) > 1 else (prefix if prefix else '')
        if plot_axis is None:
            plot_axis = plt.gca()
        line = plot_axis.plot(x, data_series, color=color, linestyle=get_linestyle(index), label=label, marker=None)
        index += 1
        lines.append(line)
    return lines

In [None]:
def plot_cdf_array(array, label, include_count = False, index=0, color=None):
    x = sorted(array)
    y = np.linspace(0., 1., len(array) + 1)[1:]
    if include_count:
        label += ' (%d)' % len(array)
    if color is None:
        color = get_next_color()
    plt.plot(x, y, label=label, color=color, linestyle=get_linestyle(index))

In [None]:
def get_carbon_intensity_data(cloud_vendor, region, date:date = None, timerange:timedelta = timedelta(weeks=1), use_utc_time_of_day = True):
    print(cloud_vendor, region)
    url_get_carbon_intensity = 'http://yeti-09.sysnet.ucsd.edu/carbon-intensity/'
    (latitude, longitude) = get_location_for_public_cloud(cloud_vendor, region)
    if date is None:
        date = arrow.get().shift(weeks=-1).date()
    if use_utc_time_of_day:
        timezone = pytz.UTC
    else:
        timezone_str = TimezoneFinder().timezone_at(lng=longitude, lat=latitude)
        timezone = pytz.timezone(timezone_str)
    date = arrow.get(date, tzinfo=timezone)
    # print(timezone_str, date, file=sys.stderr)
    response = requests.get(url_get_carbon_intensity, params={
        'latitude': latitude,
        'longitude': longitude,
        'start': date,
        'end': date.shift(minutes=-1) + timerange,
    })
    assert response.ok, "Carbon intensity lookup failed (%d): %s" % (response.status_code, response.text)
    response_json = response.json()
    electricity_region = response_json['region']
    print('region:', electricity_region)
    carbon_intensities = response_json['carbon_intensities']
    data_for_plot = []
    print(carbon_intensities[0], carbon_intensities[-1])
    for element in carbon_intensities:
        timestamp = arrow.get(element['timestamp']).datetime
        carbon_intensity = float(element['carbon_intensity'])
        data_for_plot.append({
            'timestamp': timestamp,
            'carbon_intensity': carbon_intensity,
        })
    return {
        'iso': electricity_region,
        'data': data_for_plot,
    }

In [None]:
def print_carbon_intensity_stats(l_time_series: List[dict]):
    l_carbon_intensity = [e['carbon_intensity'] for e in l_time_series]
    print('Avg/Min/Max carbon intensity: %.2f/%.2f/%.2f' % (
        np.mean(l_carbon_intensity),
        np.min(l_carbon_intensity),
        np.max(l_carbon_intensity),
    ))

In [None]:
def find_overlap_diff_of_carbon_intensities(time_series_1: List[dict], time_series_2: List[dict]) -> List[float]:
    s1_timestamps = [e['timestamp'] for e in time_series_1]
    s2_timestamps = [e['timestamp'] for e in time_series_2]
    union_timestamps = sorted(list(set(s1_timestamps).union(s2_timestamps)))
    # Same index as common_timestamps
    l1_carbon_intensity = []
    l2_carbon_intensity = []
    l_diff_carbon_intensity = []
    for index in range(len(union_timestamps)):
        curr_timestamp = union_timestamps[index]
        if curr_timestamp in s1_timestamps:
            index1 = s1_timestamps.index(curr_timestamp)
        else:   # Find the previous timestamp and use that
            index1 = max(bisect.bisect(s1_timestamps, curr_timestamp) - 1, 0)
        if curr_timestamp in s2_timestamps:
            index2 = s2_timestamps.index(curr_timestamp)
        else:
            index2 = max(bisect.bisect(s2_timestamps, curr_timestamp) - 1, 0)
        carbon_intensity1 = time_series_1[index1]['carbon_intensity']
        carbon_intensity2 = time_series_2[index2]['carbon_intensity']
        l1_carbon_intensity.append(carbon_intensity1)
        l2_carbon_intensity.append(carbon_intensity2)
        l_diff_carbon_intensity.append(carbon_intensity2 - carbon_intensity1)
    return l_diff_carbon_intensity

In [None]:
def find_overlap_interval_of_carbon_intensities(time_series_1: List[dict], time_series_2: List[dict]) -> \
        List[tuple[datetime, datetime]]:
    """Find the intervals where carbon intensity of the first time series drops below the second."""
    s1_timestamps = [e['timestamp'] for e in time_series_1]
    s2_timestamps = [e['timestamp'] for e in time_series_2]
    union_timestamps = sorted(list(set(s1_timestamps).union(s2_timestamps)))
    # Same index as common_timestamps
    l1_carbon_intensity: List[float] = []
    l2_carbon_intensity: List[float] = []
    overlap_intervals: List[tuple[datetime, datetime]] = []
    interval_start_index = None
    for index in range(len(union_timestamps)):
        curr_timestamp = union_timestamps[index]
        if curr_timestamp in s1_timestamps:
            index1 = s1_timestamps.index(curr_timestamp)
        else:   # Find the previous timestamp and use that
            index1 = max(bisect.bisect(s1_timestamps, curr_timestamp) - 1, 0)
        if curr_timestamp in s2_timestamps:
            index2 = s2_timestamps.index(curr_timestamp)
        else:
            index2 = max(bisect.bisect(s2_timestamps, curr_timestamp) - 1, 0)
        carbon_intensity1 = time_series_1[index1]['carbon_intensity']
        carbon_intensity2 = time_series_2[index2]['carbon_intensity']
        l1_carbon_intensity.append(carbon_intensity1)
        l2_carbon_intensity.append(carbon_intensity2)
        if carbon_intensity1 <= carbon_intensity2:
            if interval_start_index is None:
                interval_start_index = index
        else:
            if interval_start_index is not None:
                timestamp_start = union_timestamps[interval_start_index]
                timestamp_end = union_timestamps[index]
                overlap_intervals.append((timestamp_start, timestamp_end))
                interval_start_index = None
    return overlap_intervals

In [None]:
def plot_overlap_interval_cdf(overlap_intervals: List[tuple[datetime, datetime]], label: str) -> None:
    interval_deltas = [(interval[1] - interval[0]) for interval in overlap_intervals]
    interval_in_hours = [delta.total_seconds() / timedelta(hours=1).total_seconds() for delta in interval_deltas]
    plot_cdf_array(interval_in_hours, label)

In [None]:
def format_cloud_region_name(cloud_region: tuple[str, str], iso: str) -> str:
    """Format the name for a cloud region, including its electricity-sourcing ISO."""
    return f'{cloud_region[0]} {cloud_region[1]} ({iso})'

In [None]:
use_utc_time_of_day = True
enable_plot_time_series = False
enable_overlap_analysis = True
enable_savefig = True

# plt.figure(figsize=(10, 6))
all_cloud_vendor_and_regions = [
    ('AWS', 'us-west-1'),
    ('AWS', 'us-west-2'),
    ('AWS', 'us-east-1'),
    # AWS us-east-2 uses the same ISO as us-east-1
    # ('AWS', 'us-east-2'),
]

stepsize = timedelta(days=28)
for offset in range(1):
    target_date = arrow.get(datetime(2022, 6, 1)) + (stepsize * -(1 + offset))
    print(f'\n{stepsize.days}-day starting {target_date.strftime("%Y-%m-%d")}')
    plt.figure(figsize=(8,4))
    all_region_time_series_data = {}
    for (cloud_vendor, region) in all_cloud_vendor_and_regions:
        carbon_intensity_data = get_carbon_intensity_data(cloud_vendor, region, date=target_date, timerange=stepsize, use_utc_time_of_day=use_utc_time_of_day)
        all_region_time_series_data[(cloud_vendor, region)] = carbon_intensity_data
        time_series_data = carbon_intensity_data['data']
        if enable_plot_time_series:
            plot_timeseries(time_series_data, use_relative_time=False, prefix=f'{cloud_vendor} {region} ({carbon_intensity_data["iso"]})')
        print_carbon_intensity_stats(time_series_data)
    if enable_plot_time_series:
        if stepsize.total_seconds() == timedelta(days=1).total_seconds():
            date_formatter_string = "%H:%M"
            xlabel = f'Time of day ({"UTC" if use_utc_time_of_day else "local"})'
        else:
            date_formatter_string = "%Y/%m/%d %H:%M"
            xlabel = 'Date'
        ax = plt.gca()
        ax.xaxis.set_major_formatter(dates.DateFormatter(date_formatter_string))
        plt.xlabel(xlabel)
        plt.ylabel('Carbon intensity (gCO2/kWh)')
        plt.title(f'{stepsize.days}-day carbon intensity of {cloud_vendor} {region} starting {target_date.strftime("%Y-%m-%d")}')
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.xticks(rotation=30)
        plt.ylim(0, 800)
        plt.tight_layout()
        savefig_filename = 'carbon-intensity.%s.%ddays.png' % (target_date.strftime("%Y-%m-%d"), stepsize.days)
        if enable_savefig:
            plt.savefig(savefig_filename)
    if enable_overlap_analysis:
        plt.figure(figsize=(5, 4))
        cloud_region_pairs = [
            (('AWS', 'us-west-2'), ('AWS', 'us-west-1')),
            (('AWS', 'us-west-2'), ('AWS', 'us-east-1'))
        ]
        for (cloud_region1, cloud_region2) in cloud_region_pairs:
            carbon_data1 = all_region_time_series_data[cloud_region1]
            carbon_data2 = all_region_time_series_data[cloud_region2]
            region1_name = format_cloud_region_name(cloud_region1, carbon_data1['iso'])
            region2_name = format_cloud_region_name(cloud_region2, carbon_data2['iso'])
            overlap_intervals = find_overlap_interval_of_carbon_intensities(carbon_data1['data'],
                                                                            carbon_data2['data'])
            plot_overlap_interval_cdf(overlap_intervals, f'{region1_name} < {region2_name}')
        plt.xlabel('Overlap (hours)')
        plt.ylabel('CDF')
        plt.title('Carbon intensity overlap')
        plt.legend()
        plt.grid()
        plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.2))
        plt.tight_layout()
        savefig_filename = 'carbon-intensity.overlap.%s.%ddays.png' % (target_date.strftime("%Y-%m-%d"), stepsize.days)
        if enable_savefig:
            plt.savefig(savefig_filename)