In [1]:
from pandas import DataFrame
%matplotlib inline
from datetime import datetime

import numpy as np
from matplotlib import pyplot as plt

from benchmark import run_benchmark, run_benchmark_comparison
from lowcarb.carbon_sdk_webapi import CarbonSDK_WebAPI

try:
    plt.style.use('./util//matplotlib_stylesheet.mplstyle')
except OSError:
    pass

Using the public CarbonSDK_WebAPI from the hackathon. Appears to have the full access to worldwide data

In [2]:
api = CarbonSDK_WebAPI('https://carbon-aware-api.azurewebsites.net')

# Multi Region Benchmarks

In [3]:
config = {
    'selected_regions': ['westcentralus', 'ukwest', 'uksouth', 'westeurope', 'westus', 'australiacentral', 'australiaeast', 'swedencentral', 'norwaywest', 'norwayeast', 'northeurope', 'centralus', 'francesouth', 'francecentral'],
    'sim_start': datetime(year=2022, day=5, month=10, hour=0, minute=0, second=0),
    'sim_end': datetime(year=2022, day=9, month=10, hour=0, minute=0, second=0),
    'round_time': 12,   ###scheduling window in hours
    'windowSize': 60,   ###workload durating in minutes
    'num_clients': 3,
}

## Random Client Selection

In [4]:
###### this client selector selects random 3 clients
def random_select_clients(api, config: dict, i_round: int, round_start_time: datetime, available_clients: DataFrame, num_clients: int) -> DataFrame:
    selected_clients = available_clients.sample(num_clients)
    selected_clients.loc[:, 'scheduled_time'] = np.zeros(num_clients)
    return selected_clients

In [5]:
result = run_benchmark(api, config, random_select_clients)
print(f'Total training emission: {result["emissions_total"].sum()}')

Unnamed: 0,region,trained,untrained_since,emissions_total,emissions_1,emissions_2,emissions_3,emissions_4,emissions_5,emissions_6,emissions_7,emissions_8
0,westcentralus,2,5,1185.709355,593.998108,,591.711247,,,,,
1,ukwest,0,8,0.0,,,,,,,,
2,uksouth,2,2,765.11583,,,,374.100307,,391.015523,,
3,westeurope,1,1,415.301614,,,,,,,415.301614,
4,westus,0,8,0.0,,,,,,,,
5,australiacentral,2,0,1481.527179,,,,737.994786,,,,743.532393
6,australiaeast,4,4,2505.871048,735.103135,508.439247,524.33388,737.994786,,,,
7,swedencentral,2,6,1004.10231,545.293627,458.808682,,,,,,
8,norwaywest,3,0,1746.255026,,,,,588.366003,583.697781,,574.191241
9,norwayeast,1,3,588.366003,,,,,588.366003,,,


Total training emission: 12064.101766479587


## Round Robin Client Selection

In [6]:
####this client selector selects always the 3 least trained clients
def round_robin_select_clients(api, config: dict, i_round: int, round_start_time: datetime, available_clients: DataFrame, num_clients: int) -> DataFrame:
    selected_clients = available_clients.sort_values(by='untrained_since', ascending=False).iloc[0:num_clients]
    selected_clients.loc[:, 'scheduled_time'] = np.zeros(num_clients)
    return selected_clients

In [7]:
result = run_benchmark(api, config, round_robin_select_clients)
print(f'Total training emission: {result["emissions_total"].sum()}')

Unnamed: 0,region,trained,untrained_since,emissions_total,emissions_1,emissions_2,emissions_3,emissions_4,emissions_5,emissions_6,emissions_7,emissions_8
0,westcentralus,2,3,1195.631688,593.998108,,,,601.63358,,,
1,ukwest,2,2,735.878022,344.862499,,,,,391.015523,,
2,uksouth,2,2,735.878022,344.862499,,,,,391.015523,,
3,westeurope,2,2,942.092453,,462.002729,,,,480.089724,,
4,westus,2,1,821.871575,,411.748474,,,,,410.123101,
5,australiacentral,2,1,1216.440238,,508.439247,,,,,708.000991,
6,australiaeast,2,1,1232.334871,,,524.33388,,,,708.000991,
7,swedencentral,2,0,1102.664152,,,549.432658,,,,,553.231494
8,norwaywest,2,0,1145.018339,,,570.827098,,,,,574.191241
9,norwayeast,2,0,1119.39037,,,,545.199129,,,,574.191241


Total training emission: 11968.99856623625


## Forecast Client Selection

In [8]:
#### this client selector selects the optimum based on the forecast and gives the optimum delay
def forecast_client_selection(api, config: dict, i_round: int, round_start_time: datetime, available_clients: DataFrame, num_clients: int) -> DataFrame:
    scheduled_times = api\
        .get_historic_forecast_batch(regions=available_clients.loc[:, 'region'].to_list(),
                                     start_time=round_start_time, windowSize=config['windowSize'],
                                     roundtime=config['round_time'])\
        .groupby('region', group_keys=True)\
        .apply(lambda df: df.iloc[df['value'].idxmin()][['value', 'timestamp_indv']])

    selected_clients = available_clients\
                           .merge(scheduled_times, on='region')\
                           .rename(columns={'timestamp_indv': 'scheduled_time'})\
                           .sort_values('value', ascending=True)\
                           .iloc[0:num_clients][['region', 'scheduled_time']]

    return selected_clients

In [9]:
result = run_benchmark(api, config, forecast_client_selection)
print(f'Total training emission: {result["emissions_total"].sum()}')

Unnamed: 0,region,trained,untrained_since,emissions_total,emissions_1,emissions_2,emissions_3,emissions_4,emissions_5,emissions_6,emissions_7,emissions_8
0,westcentralus,0,8,0.0,,,,,,,,
1,ukwest,7,0,2507.666518,332.9179,,316.134982,383.398951,370.584966,386.725295,374.799595,343.104829
2,uksouth,0,8,0.0,,,,,,,,
3,westeurope,0,8,0.0,,,,,,,,
4,westus,1,6,386.781994,,386.781994,,,,,,
5,australiacentral,0,8,0.0,,,,,,,,
6,australiaeast,0,8,0.0,,,,,,,,
7,swedencentral,0,8,0.0,,,,,,,,
8,norwaywest,0,8,0.0,,,,,,,,
9,norwayeast,0,8,0.0,,,,,,,,


Total training emission: 6495.971929498333


## Comparison Benchmark
**run_benchmark_comparison calculates the emission if the complete workload is done in one region all at once**

In [10]:
benchmark_comparison = run_benchmark_comparison(api, config)

Unnamed: 0,region,emission_total
0,westcentralus,15904.384868
1,ukwest,10249.413092
2,uksouth,10249.413092
3,westeurope,10684.017581
4,westus,10104.824224
5,australiacentral,19166.595994
6,australiaeast,19166.595994
7,swedencentral,15513.289547
8,norwaywest,15282.387931
9,norwayeast,15282.387931


# Single Region Benchmarks

## Forecast Client Selection

In [11]:
config_single_region = {
    'selected_regions': ['norwayeast'],
    'sim_start': datetime(year=2022, day=5, month=10, hour=0, minute=0, second=0),
    'sim_end': datetime(year=2022, day=9, month=10, hour=0, minute=0, second=0),
    'round_time': 12,   ###scheduling window in hours
    'windowSize': 60,   ###workload durating in minutes
    'num_clients': 1,
}

In [12]:
result = run_benchmark(api, config_single_region, forecast_client_selection)
print(f'Total training emission: {result["emissions_total"].sum()}')

Unnamed: 0,region,trained,untrained_since,emissions_total,emissions_1,emissions_2,emissions_3,emissions_4,emissions_5,emissions_6,emissions_7,emissions_8
0,norwayeast,8,0,4488.523297,543.271361,579.086259,570.657001,542.307478,592.731829,581.543218,543.895051,535.0311


Total training emission: 4488.523297335


In [13]:
benchmark_comparison = run_benchmark_comparison(api, config_single_region)

Unnamed: 0,region,emission_total
0,norwayeast,5103.892746
