# Cloud Carbon Coefficients

#### Imports

In [122]:
%%capture
# ^ Hide the output of this cell (must be on the first line)

# Install dependencies
%pip install -r requirements.txt
%pip install requests

import csv
import numpy as np
import pandas as pd

import requests
import re
from bs4 import BeautifulSoup

import warnings; warnings.simplefilter('ignore')

In [240]:
# Send a request to the website and get the HTML response
url = 'https://www.spec.org/power_ssj2008/results/power_ssj2008.html'
response = requests.get(url)
html = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

# Find the table containing the benchmark results
table = None
for t in soup.find_all('table'):
    table = t
    break

if table is not None:
    # Extract the column headers
    headers = []
    for th in table.select('thead tr.header th'):
        if th.text.strip() not in ['Processor', 'Submeasurements']:
            headers.append(th.text.strip())

    # Check if 'Result(Overall ssj_ops/watt)' exists and move it to the last index if it does
    if 'Result(Overall ssj_ops/watt)' in headers:
        headers.remove('Result(Overall ssj_ops/watt)')
        headers.append('Result(Overall ssj_ops/watt)')

    # Check if 'TotalMemory (GB)' exists and move it to the 10th index if it does
    if 'TotalMemory (GB)' in headers:
        headers.remove('TotalMemory (GB)')
        headers.insert(9, 'TotalMemory (GB)')    

    # Extract the data rows
    def parse_string(s):
        # Remove text following '@'
        s = s.split('@')[0].strip()

        # Remove parenthesis containing 'GHz'
        s = re.sub(r'\(.*GHz.*\)', '', s).strip()

        # Remove any string containing 'GHz'
        s = re.sub(r'\w+\.\w+', '', s).strip()

        # Remove any string containing 'GHz', 'Ghz', or 'ghz'
        s = ' '.join([x for x in s.split() if not re.search(r'Ghz', x, re.IGNORECASE)])
        return s

    # Extract the data rows
    data_rows = []
    for row in table.find_all('tr')[1:]:
        data_row = [data.text.strip() for data in row.find_all('td')]
        # Clean and remove commas from each item in the data row
        data_row = [data.replace(',', '') for data in data_row]
        if len(data_row) > 0:
            data_row[4] = parse_string(data_row[4])
        data_rows.append(data_row)

    # Write the data to a CSV file
    with open('data/SPECpower-full-results.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(headers)
        for data_row in data_rows:
            writer.writerow(data_row)

    print('CSV file generated successfully.')

else:
    print('Table not found.')

CSV file generated successfully.


## Use stage emissions

### Constants

These constants are used as part of the calculations for the use stage 
emissions.

In [241]:
# https://www.cloudcarbonfootprint.org/docs/methodology#memory
MEMORY_COEFFICIENT = 0.000392 # kWh / Gb

## Embodied emissions

### Constants

These constants are used as part of the calculations for the embodied emissions
factors for each instance type. They're based on [the work done by
Teads](https://medium.com/teads-engineering/building-an-aws-ec2-carbon-emissions-dataset-3f0fd76c98ac)
and extracted from [the source Google
Sheet](https://docs.google.com/spreadsheets/d/1YhtGO_UU9Hc162m7eQKYFQOnV4_yEK5_lgHYfl02JPE/edit#gid=2090774556).

In [242]:
# Manufacturing emissions for a mono socket, low DRAM, no local storage 
# commodity rack server
BASE_MANUFACTURING_EMISSIONS = 1000 # kgCO2eq

# Commodity rack server lifespan
RACK_SERVER_LIFESPAN = 4 # years

# Hourly manufacturing emissions conversion factor - linearly amortised
MANUFACTURING_EMISSIONS = BASE_MANUFACTURING_EMISSIONS / RACK_SERVER_LIFESPAN / 12 / 30 / 24 # gCO2eq/hour

# DRAM Threshold to unlock additional Scope 3 emissions
DRAM_THRESHOLD = 16 # GB

# Manufacturing emissions for the threshold DRAM amount
# Based on Dell PowerEdge R740 Life-Cycle Assessment
# https://docs.google.com/spreadsheets/d/1YhtGO_UU9Hc162m7eQKYFQOnV4_yEK5_lgHYfl02JPE/edit#gid=954946016
# = 533 kgCO₂eq for 12*32GB DIMMs Memory (384 GB).
DRAM_MANUFACTURING_EMISSIONS = (533 / 384) * DRAM_THRESHOLD

# Manufacturing emissions per additional CPU
CPU_MANUFACTURING_EMISSIONS = 100 # kgCO2eq

# Manufacturing emissions per additional HDD
HDD_MANUFACTURING_EMISSIONS = 50 # kgCO2eq

# Manufacturing emissions per additional SSD
SSD_MANUFACTURING_EMISSIONS = 100 # kgCO2eq

# Manufacturing emissions per additional GPU Card
GPU_MANUFACTURING_EMISSIONS = 150 # kgCO2eq


### Calculations

Embodied emissions are based on a representative baseline
(`BASE_MANUFACTURING_EMISSIONS`) with additional factor added for extra
components - memory, storage, CPUs and GPUs.

In [243]:
azure_cpus = pd.read_csv(f'data/azure-instances-cpus.csv')
aws_cpus = pd.read_csv(f'data/aws-instances-cpus.csv')
gcp_cpus = pd.read_csv(f'data/gcp-instances-cpus.csv')

def calculate_additional_memory_emissions(platform_memory):
    """If the platform memory is greater than the baseline, calculate the   
    additional emissions."""

    if float(platform_memory) > DRAM_THRESHOLD:
        additional_emissions = float((float(platform_memory) - DRAM_THRESHOLD) * (DRAM_MANUFACTURING_EMISSIONS / DRAM_THRESHOLD))
        
    else: 
        additional_emissions = 0.0

    return additional_emissions

def calculate_additional_storage_emissions(storage_type, drive_quantity):
    """Calculate additional emissions for storage, depending on the storage 
    type."""

    if drive_quantity <= 0:
        return 0.0

    if storage_type.lower() == 'ssd':
        factor = SSD_MANUFACTURING_EMISSIONS
    else:
        factor = HDD_MANUFACTURING_EMISSIONS

    return float(drive_quantity * factor)

def calculate_additional_cpu_emissions(platform_name, cpu_name):
    """Calculate emissions for additional CPUs for the specified cloud
    platform."""

    # For AWS we know the precise CPU
    if platform_name == 'aws':
        cpus = aws_cpus

        cpu = cpus.query(f'`CPU Name` == \"{cpu_name}\"')

        if int(cpu['Platform Number of CPU Socket(s)']) > 0:
            return float((int(cpu['Platform Number of CPU Socket(s)']) - 1) * CPU_MANUFACTURING_EMISSIONS)
        else:
            return 0.0
    # For Azure & GCP we only know the general CPU architecture
    elif platform_name == 'azure' or 'gcp':
        cpus = gcp_cpus

        cpu = cpus.query(f'`Microarchitecture` == \"{cpu_name}\"')

        if int(cpu['CPU Sockets']) > 0:
            return float((int(cpu['CPU Sockets']) - 1) * CPU_MANUFACTURING_EMISSIONS)
        else:
            return 0.0
    else:
        return 0.

def calculate_additional_gpu_emissions(gpu_quantity):
    """Calculate additional emissions for any GPUs."""

    if gpu_quantity > 0:
        return float(gpu_quantity * GPU_MANUFACTURING_EMISSIONS)
    else:
        return 0.0
    

## Processor types

Processors are grouped into types by each vendor e.g. Intel Broadwell CPUs. Cloud providers provide the CPU types for each of their instance types, but not the precise CPU details. As such, we calculate the average wattage by processor type.

In [244]:
def load_append_list(file_name):
    """Loads a CSV file then returns each row appended to a list."""

    with open(f'data/{file_name}', 'r') as csvfile:
        reader = csv.reader(csvfile)

        data = []
        for row in reader:
            data.append(row[0])
        
        return data

cpus_amd_epyc_gen1 = load_append_list('amd-epyc-gen1.csv')
assert 'EPYC 7601' in cpus_amd_epyc_gen1
cpus_amd_epyc_gen2 = load_append_list('amd-epyc-gen2.csv')
assert 'EPYC 7742' in cpus_amd_epyc_gen2
cpus_amd_epyc_gen3 = load_append_list('amd-epyc-gen3.csv')
assert 'EPYC 75F3' in cpus_amd_epyc_gen3
cpus_intel_sandybridge = load_append_list('intel-sandybridge.csv')
assert 'E5-4610' in cpus_intel_sandybridge
cpus_intel_ivybridge = load_append_list('intel-ivybridge.csv')
assert 'E5-2609 v2' in cpus_intel_ivybridge
cpus_intel_haswell = load_append_list('intel-haswell.csv')
assert 'E5-2630 v3' in cpus_intel_haswell
cpus_intel_broadwell = load_append_list('intel-broadwell.csv')
assert 'E5-2683 v4' in cpus_intel_broadwell
cpus_intel_skylake = load_append_list('intel-skylake.csv')
assert 'Platinum 8160T' in cpus_intel_skylake
cpus_intel_cascadelake = load_append_list('intel-cascadelake.csv')
assert 'Gold 6230R' in cpus_intel_cascadelake
cpus_intel_coffeelake = load_append_list('intel-coffeelake.csv')
assert 'E-2246G' in cpus_intel_coffeelake

## Processor lists

Now we know which processors are in which type, we can group all the tested servers by their CPU type to calculate: average idle watts, average watts at 100% utilization, average GB/chip.

In [245]:
# Load all servers from SPECpower results CSV
servers = pd.read_csv('data/SPECpower-full-results.csv', na_values=['NC'])

#### Regex match

The regex to match the CPU names matches to the end of the line using `$` because some chips have version numbers after, so we can't just do a substring match e.g. `Intel E3-1230` is a Sandy Bridge chip but `Intel E3-1230 v3` is Haswell. It is case insensitive due to the use of `v3` and `V3` inconsistently in the definitions.

#### Clean data

The SPECpower results often appends extra info to the `CPU Description` column which is unecessary. For example, `Intel Xeon E5-2470 (Intel Turbo Boost Technology up to 3.10 GHz)`. This extra info needs to be stripped e.g. to `Intel Xeon E5-2470` otherwise the regex match will not work.

The check below will error if the data is not clean.

In [246]:
if len(servers[servers['CPU Description'].str.contains('Ghz')]) > 0:
    print('Data not clean')
    import sys
    sys.exit(1)

### AMD: EPYC Gen 1

In [247]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_amd_epyc_gen1]
servers_amd_epyc_gen1 = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

amd_epyc_gen1 = {}
amd_epyc_gen1['Idle watts'] = (servers_amd_epyc_gen1['avg. watts@ active idle'].astype(float) / servers_amd_epyc_gen1['TotalThreads'].replace(',','').astype(float)).mean()
amd_epyc_gen1['100% watts'] = (servers_amd_epyc_gen1['avg. watts@ 100%'].astype(float) / servers_amd_epyc_gen1['TotalThreads'].astype(float)).mean()
amd_epyc_gen1['GB/Chip'] = (servers_amd_epyc_gen1['TotalMemory (GB)'].astype(float) / servers_amd_epyc_gen1['Chips'].astype(float)).mean()
amd_epyc_gen1['Total memory'] = servers_amd_epyc_gen1['TotalMemory (GB)'].astype(float)

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
amd_epyc_gen1['100% watts GCP adjusted'] = amd_epyc_gen1['100% watts'] - (amd_epyc_gen1['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {amd_epyc_gen1["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen1["Idle watts"].mean())) == 0.82

print(f'Average: Max Watts = {amd_epyc_gen1["100% watts"].mean():,.2f}')
#assert float('{:,.2f}'.format(amd_epyc_gen1["100% watts"].mean())) == 2.55

print(f'Average: Max Watts (GCP) = {amd_epyc_gen1["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen1["100% watts GCP adjusted"].mean())) == 2.49

print(f'Average: GB/Chip = {amd_epyc_gen1["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen1["GB/Chip"].mean())) == 89.60

Average: Min Watts = 0.85
Average: Max Watts = 2.60
Average: Max Watts (GCP) = 2.54
Average: GB/Chip = 92.44


### AMD: EPYC Gen 2

In [249]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_amd_epyc_gen2]
servers_amd_epyc_gen2 = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

amd_epyc_gen2 = {}
amd_epyc_gen2['Idle watts'] = (servers_amd_epyc_gen2['avg. watts@ active idle'] / servers_amd_epyc_gen2['TotalThreads']).mean()
amd_epyc_gen2['100% watts'] = (servers_amd_epyc_gen2['avg. watts@ 100%'] / servers_amd_epyc_gen2['TotalThreads']).mean()
amd_epyc_gen2['GB/Chip'] = (servers_amd_epyc_gen2['TotalMemory (GB)'] / servers_amd_epyc_gen2['Chips']).mean()
amd_epyc_gen2['Total memory'] = servers_amd_epyc_gen2['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
amd_epyc_gen2['100% watts GCP adjusted'] = amd_epyc_gen2['100% watts'] - (amd_epyc_gen2['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {amd_epyc_gen2["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen2["Idle watts"].mean())) == 0.47

print(f'Average: Max Watts = {amd_epyc_gen2["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen2["100% watts"].mean())) == 1.69

print(f'Average: Max Watts (GCP) = {amd_epyc_gen2["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen2["100% watts GCP adjusted"].mean())) == 1.58

print(f'Average: GB/Chip = {amd_epyc_gen2["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen2["GB/Chip"].mean())) == 129.78

Average: Min Watts = 0.47
Average: Max Watts = 1.69
Average: Max Watts (GCP) = 1.58
Average: GB/Chip = 129.78


### AMD: EPYC Gen 3

In [250]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_amd_epyc_gen3]
servers_amd_epyc_gen3 = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

amd_epyc_gen3 = {}
amd_epyc_gen3['Idle watts'] = (servers_amd_epyc_gen3['avg. watts@ active idle'].astype(float) / servers_amd_epyc_gen3['TotalThreads']).mean()
amd_epyc_gen3['100% watts'] = (servers_amd_epyc_gen3['avg. watts@ 100%'].astype(float) / servers_amd_epyc_gen3['TotalThreads']).mean()
amd_epyc_gen3['GB/Chip'] = (servers_amd_epyc_gen3['TotalMemory (GB)'] / servers_amd_epyc_gen3['Chips']).mean()
amd_epyc_gen3['Total memory'] = servers_amd_epyc_gen3['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
amd_epyc_gen3['100% watts GCP adjusted'] = amd_epyc_gen3['100% watts'] - (amd_epyc_gen3['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {amd_epyc_gen3["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen3["Idle watts"].mean())) == 0.45

print(f'Average: Max Watts = {amd_epyc_gen3["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen3["100% watts"].mean())) == 2.02

print(f'Average: Max Watts (GCP) = {amd_epyc_gen3["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen3["100% watts GCP adjusted"].mean())) == 1.87

print(f'Average: GB/Chip = {amd_epyc_gen3["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(amd_epyc_gen3["GB/Chip"].mean())) == 128.00

Average: Min Watts = 0.43
Average: Max Watts = 1.95
Average: Max Watts (GCP) = 1.82
Average: GB/Chip = 128.00


### Intel: Sandy Bridge

In [251]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_sandybridge]
servers_intel_sandybridge = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_sandybridge = {}
intel_sandybridge['Idle watts'] = (servers_intel_sandybridge['avg. watts@ active idle'].astype(float) / servers_intel_sandybridge['TotalThreads']).mean()
intel_sandybridge['100% watts'] = (servers_intel_sandybridge['avg. watts@ 100%'].astype(float) / servers_intel_sandybridge['TotalThreads']).mean()
intel_sandybridge['GB/Chip'] = (servers_intel_sandybridge['TotalMemory (GB)'] / servers_intel_sandybridge['Chips']).mean()
intel_sandybridge['Total memory'] = servers_intel_sandybridge['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_sandybridge['100% watts GCP adjusted'] = intel_sandybridge['100% watts'] - (intel_sandybridge['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_sandybridge["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_sandybridge["Idle watts"].mean())) == 2.17

print(f'Average: Max Watts = {intel_sandybridge["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_sandybridge["100% watts"].mean())) == 8.58

print(f'Average: Max Watts (GCP) = {intel_sandybridge["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_sandybridge["100% watts GCP adjusted"].mean())) == 8.55

print(f'Average: GB/Chip = {intel_sandybridge["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_sandybridge["GB/Chip"].mean())) == 16.48

Average: Min Watts = 2.17
Average: Max Watts = 8.58
Average: Max Watts (GCP) = 8.55
Average: GB/Chip = 16.48


### Intel: Ivy Bridge

In [274]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_ivybridge]
servers_intel_ivybridge = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_ivybridge = {}
intel_ivybridge['Idle watts'] = (servers_intel_ivybridge['avg. watts@ active idle'].astype(float) / servers_intel_ivybridge['TotalThreads']).mean()
intel_ivybridge['100% watts'] = (servers_intel_ivybridge['avg. watts@ 100%'].astype(float) / servers_intel_ivybridge['TotalThreads']).mean()
intel_ivybridge['GB/Chip'] = (servers_intel_ivybridge['TotalMemory (GB)'] / servers_intel_ivybridge['Chips']).mean()
intel_ivybridge['Total memory'] = servers_intel_ivybridge['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_ivybridge['100% watts GCP adjusted'] = intel_ivybridge['100% watts'] - (intel_ivybridge['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_ivybridge["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_ivybridge["Idle watts"].mean())) == 3.04

print(f'Average: Max Watts = {intel_ivybridge["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_ivybridge["100% watts"].mean())) == 8.25

print(f'Average: Max Watts (GCP) = {intel_ivybridge["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_ivybridge["100% watts GCP adjusted"].mean())) == 8.20

print(f'Average: GB/Chip = {intel_ivybridge["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_ivybridge["GB/Chip"].mean())) == 14.93

Average: Min Watts = 1.71
Average: Max Watts = 5.56
Average: Max Watts (GCP) = 5.49
Average: GB/Chip = 17.45


### Intel: Haswell

In [275]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_haswell]
servers_intel_haswell = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_haswell = {}
intel_haswell['Idle watts'] = (servers_intel_haswell['avg. watts@ active idle'] / servers_intel_haswell['TotalThreads']).mean()
intel_haswell['100% watts'] = (servers_intel_haswell['avg. watts@ 100%'] / servers_intel_haswell['TotalThreads']).mean()
intel_haswell['GB/Chip'] = (servers_intel_haswell['TotalMemory (GB)'] / servers_intel_haswell['Chips']).mean()
intel_haswell['Total memory'] = servers_intel_haswell['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_haswell['100% watts GCP adjusted'] = intel_haswell['100% watts'] - (intel_haswell['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_haswell["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_haswell["Idle watts"].mean())) == 1.90

print(f'Average: Max Watts = {intel_haswell["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_haswell["100% watts"].mean())) == 6.01

print(f'Average: Max Watts (GCP) = {intel_haswell["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_haswell["100% watts GCP adjusted"].mean())) == 5.97

print(f'Average: GB/Chip = {intel_haswell["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_haswell["GB/Chip"].mean())) == 27.31

Average: Min Watts = 12.87
Average: Max Watts = 5.60
Average: Max Watts (GCP) = 5.54
Average: GB/Chip = 31.27


### Intel: Broadwell

In [256]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_broadwell]
servers_intel_broadwell = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_broadwell = {}
intel_broadwell['Idle watts'] = (servers_intel_broadwell['avg. watts@ active idle'].astype(float) / servers_intel_broadwell['TotalThreads']).mean()
intel_broadwell['100% watts'] = (servers_intel_broadwell['avg. watts@ 100%'].astype(float) / servers_intel_broadwell['TotalThreads']).mean()
intel_broadwell['GB/Chip'] = (servers_intel_broadwell['TotalMemory (GB)'] / servers_intel_broadwell['Chips']).mean()
intel_broadwell['Total memory'] = servers_intel_broadwell['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_broadwell['100% watts GCP adjusted'] = intel_broadwell['100% watts'] - (intel_broadwell['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_broadwell["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_broadwell["Idle watts"].mean())) == 0.71

print(f'Average: Max Watts = {intel_broadwell["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_broadwell["100% watts"].mean())) == 3.69

print(f'Average: Max Watts (GCP) = {intel_broadwell["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_broadwell["100% watts GCP adjusted"].mean())) == 3.39

print(f'Average: GB/Chip = {intel_broadwell["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_broadwell["GB/Chip"].mean())) == 69.65

Average: Min Watts = 0.71
Average: Max Watts = 3.69
Average: Max Watts (GCP) = 3.39
Average: GB/Chip = 69.65


### Intel: Skylake

In [257]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_skylake]
servers_intel_skylake = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_skylake = {}
intel_skylake['Idle watts'] = (servers_intel_skylake['avg. watts@ active idle'].astype(float) / servers_intel_skylake['TotalThreads']).mean()
intel_skylake['100% watts'] = (servers_intel_skylake['avg. watts@ 100%'].astype(float) / servers_intel_skylake['TotalThreads']).mean()
intel_skylake['GB/Chip'] = (servers_intel_skylake['TotalMemory (GB)'] / servers_intel_skylake['Chips']).mean()
intel_skylake['Total memory'] = servers_intel_skylake['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_skylake['100% watts GCP adjusted'] = intel_skylake['100% watts'] - (intel_skylake['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_skylake["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_skylake["Idle watts"].mean())) == 0.64

print(f'Average: Max Watts = {intel_skylake["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_skylake["100% watts"].mean())) == 4.19

print(f'Average: Max Watts (GCP) = {intel_skylake["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_skylake["100% watts GCP adjusted"].mean())) == 3.90

print(f'Average: GB/Chip = {intel_skylake["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_skylake["GB/Chip"].mean())) == 80.43

Average: Min Watts = 0.62
Average: Max Watts = 4.13
Average: Max Watts (GCP) = 3.83
Average: GB/Chip = 80.21


### Intel: Cascade Lake

In [258]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_cascadelake]
servers_intel_cascadelake = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_cascadelake = {}
intel_cascadelake['Idle watts'] = (servers_intel_cascadelake['avg. watts@ active idle'].astype(float) / servers_intel_cascadelake['TotalThreads']).mean()
intel_cascadelake['100% watts'] = (servers_intel_cascadelake['avg. watts@ 100%'].astype(float) / servers_intel_cascadelake['TotalThreads']).mean()
intel_cascadelake['GB/Chip'] = (servers_intel_cascadelake['TotalMemory (GB)'] / servers_intel_cascadelake['Chips']).mean()
intel_cascadelake['Total memory'] = servers_intel_cascadelake['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_cascadelake['100% watts GCP adjusted'] = intel_cascadelake['100% watts'] - (intel_cascadelake['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_cascadelake["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_cascadelake["Idle watts"].mean())) == 0.64

print(f'Average: Max Watts = {intel_cascadelake["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_cascadelake["100% watts"].mean())) == 3.97

print(f'Average: Max Watts (GCP) = {intel_cascadelake["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_cascadelake["100% watts GCP adjusted"].mean())) == 3.64

print(f'Average: GB/Chip = {intel_cascadelake["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_cascadelake["GB/Chip"].mean())) == 98.12

Average: Min Watts = 0.69
Average: Max Watts = 4.06
Average: Max Watts (GCP) = 3.74
Average: GB/Chip = 107.20


### Intel: Coffee Lake

In [276]:
# Construct regex to match the chip name exactly to the end of the line
# (See notes above on regex and clean data)
cpus_re = [rf'(?i)(\b{string}$)' for string in cpus_intel_coffeelake]
servers_intel_coffeelake = servers[servers['CPU Description'].str.contains('|'.join(cpus_re))]

intel_coffeelake = {}
intel_coffeelake['Idle watts'] = (servers_intel_coffeelake['avg. watts@ active idle'].astype(float) / servers_intel_coffeelake['TotalThreads']).mean()
intel_coffeelake['100% watts'] = (servers_intel_coffeelake['avg. watts@ 100%'].astype(float) / servers_intel_coffeelake['TotalThreads']).mean()
intel_coffeelake['GB/Chip'] = (servers_intel_coffeelake['TotalMemory (GB)'] / servers_intel_coffeelake['Chips']).mean()
intel_coffeelake['Total memory'] = servers_intel_coffeelake['TotalMemory (GB)']

# Adjustment for GCP
# See https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/issues/73
intel_coffeelake['100% watts GCP adjusted'] = intel_coffeelake['100% watts'] - (intel_coffeelake['Total memory'] * MEMORY_COEFFICIENT)

print(f'Average: Min Watts = {intel_coffeelake["Idle watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_coffeelake["Idle watts"].mean())) == 1.14

print(f'Average: Max Watts = {intel_coffeelake["100% watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_coffeelake["100% watts"].mean())) == 5.42

print(f'Average: Max Watts (GCP) = {intel_coffeelake["100% watts GCP adjusted"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_coffeelake["100% watts GCP adjusted"].mean())) == 5.41

print(f'Average: GB/Chip = {intel_coffeelake["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(intel_coffeelake["GB/Chip"].mean())) == 19.56

Average: Min Watts = 1.14
Average: Max Watts = 5.42
Average: Max Watts (GCP) = 5.41
Average: GB/Chip = 19.56


## Azure

### Use stage coefficients

These values go in [`packages/azure/src/domain/AzureFootprintEstimationConstants.ts`](https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/blob/trunk/packages/azure/src/domain/AzureFootprintEstimationConstants.ts)

In [263]:
azure_instances = pd.read_csv('data/azure-instances.csv', na_values=['NC'])
azure_architectures = azure_instances['Microarchitecture'].unique()
azure_coefficients = []

for cpu in azure_architectures:
    if cpu == 'EPYC 1st Gen':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': amd_epyc_gen1['Idle watts'],
            'Max Watts': amd_epyc_gen1['100% watts'],
            'GB/Chip': amd_epyc_gen1['GB/Chip']
        })

    elif cpu == 'EPYC 2nd Gen':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': amd_epyc_gen2['Idle watts'],
            'Max Watts': amd_epyc_gen2['100% watts'],
            'GB/Chip': amd_epyc_gen2['GB/Chip']
        })

    elif cpu == 'EPYC 3rd Gen':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': amd_epyc_gen3['Idle watts'],
            'Max Watts': amd_epyc_gen3['100% watts'],
            'GB/Chip': amd_epyc_gen3['GB/Chip']
        })

    elif cpu == 'Sandy Bridge':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_sandybridge['Idle watts'],
            'Max Watts': intel_sandybridge['100% watts'],
            'GB/Chip': intel_sandybridge['GB/Chip']
        })

    elif cpu == 'Ivy Bridge':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_ivybridge['Idle watts'],
            'Max Watts': intel_ivybridge['100% watts'],
            'GB/Chip': intel_ivybridge['GB/Chip']
        })

    elif cpu == 'Haswell':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_haswell['Idle watts'],
            'Max Watts': intel_haswell['100% watts'],
            'GB/Chip': intel_haswell['GB/Chip']
        })

    elif cpu == 'Broadwell':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_broadwell['Idle watts'],
            'Max Watts': intel_broadwell['100% watts'],
            'GB/Chip': intel_broadwell['GB/Chip']
        })

    elif cpu == 'Skylake':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_skylake['Idle watts'],
            'Max Watts': intel_skylake['100% watts'],
            'GB/Chip': intel_skylake['GB/Chip']
        })

    elif cpu == 'Cascade Lake':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_cascadelake['Idle watts'],
            'Max Watts': intel_cascadelake['100% watts'],
            'GB/Chip': intel_cascadelake['GB/Chip']
        })

    elif cpu == 'Coffee Lake':
        azure_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_coffeelake['Idle watts'],
            'Max Watts': intel_coffeelake['100% watts'],
            'GB/Chip': intel_coffeelake['GB/Chip']
        })

    else:
        print(f'No match for CPU: {cpu}')    

azure_coefficients = pd.DataFrame(azure_coefficients)
azure_coefficients = azure_coefficients.drop_duplicates(ignore_index=True)

print(f'Average: Min Watts = {azure_coefficients["Min Watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(azure_coefficients["Min Watts"].mean())) == 0.85

print(f'Average: Max Watts = {azure_coefficients["Max Watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(azure_coefficients["Max Watts"].mean())) == 3.69

print(f'Average: GB/Chip = {azure_coefficients["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(azure_coefficients["GB/Chip"].mean())) == 80.30

azure_coefficients.to_csv('output/coefficients-azure-use.csv')

No match for CPU: Unknown
Average: Min Watts = 2.22
Average: Max Watts = 3.64
Average: GB/Chip = 82.26


### Embodied emissions

Outputs embodied emissions values for each instance type and CPU architecture.

The list of instances with embodied emissions is output to
`output/coefficients-azure-embodied.csv` ready for import into
[CCF](https://github.com/cloud-carbon-footprint/cloud-carbon-footprint).

In [264]:
azure_instances_embodied = []

for key, instance in azure_instances.iterrows():
    # Call our calculation methods for each of the additional components
    additional_memory = calculate_additional_memory_emissions(
        instance['Platform Memory'])

    additional_storage = calculate_additional_storage_emissions(
        instance['Platform Storage Type'],
        instance['Platform (largest instance) Storage Drive quantity'])

    additional_cpus = calculate_additional_cpu_emissions(
        'azure',
        instance['Microarchitecture'])

    additional_gpus = calculate_additional_gpu_emissions(
        instance['Platform GPU'])
    
    # Build a dictionary of the instance emissions
    azure_instances_embodied.append({
        'family': instance['Series'],
        'type': instance['Virtual Machine'],
        'microarchitecture': instance['Microarchitecture'],
        'additional_memory': round(additional_memory, 2),
        'additional_storage': round(additional_storage, 2),
        'additional_cpus': round(additional_cpus, 2),
        'additional_gpus': round(additional_gpus, 2),
        'total': round(BASE_MANUFACTURING_EMISSIONS + additional_memory + additional_storage + additional_cpus + additional_gpus, 2)
    })

azure_instances_embodied = pd.DataFrame(azure_instances_embodied)

# Pick some random instances to test the results are as expected
result = azure_instances_embodied.query('type == "A1 v2"')
assert np.isclose(result['additional_memory'], 66.62)
assert np.isclose(result['additional_storage'], 50.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 0.0)
assert np.isclose(result['total'], 1216.62)

result = azure_instances_embodied.query('type == "NC24s v3"')
assert np.isclose(result['additional_memory'], 599.62)
assert np.isclose(result['additional_storage'], 50.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 600.0)
assert np.isclose(result['total'], 2349.62)

result = azure_instances_embodied.query('type == "S896om"')
assert np.isclose(result['additional_memory'], 51145.79)
assert np.isclose(result['additional_storage'], 50.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 0.0)
assert np.isclose(result['total'], 52295.79)

azure_instances_embodied
azure_instances_embodied.to_csv('output/coefficients-azure-embodied.csv')

## AWS

### Use stage coefficients

These values go in [`packages/aws/src/domain/AwsFootprintEstimationConstants.ts`](https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/blob/trunk/packages/aws/src/domain/AwsFootprintEstimationConstants.ts)

In [266]:
aws_instances = pd.read_csv('data/aws-instances.csv', na_values=['NC'])
aws_platforms = aws_instances['Platform CPU Name'].unique()
aws_coefficients = []
aws_platforms_check = []

for cpu in aws_platforms:
    # Clean the CSV data to allow it to match with CPU lists
    cpu = cpu.replace('Xeon Platinum', '')
    cpu = cpu.replace('Xeon', '')
    cpu = cpu.strip()

    if cpu in cpus_amd_epyc_gen1:
        aws_coefficients.append({
            'Architecture': 'EPYC 1st Gen',
            'Min Watts': amd_epyc_gen1['Idle watts'],
            'Max Watts': amd_epyc_gen1['100% watts'],
            'GB/Chip': amd_epyc_gen1['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_amd_epyc_gen2:
        aws_coefficients.append({
            'Architecture': 'EPYC 2nd Gen',
            'Min Watts': amd_epyc_gen2['Idle watts'],
            'Max Watts': amd_epyc_gen2['100% watts'],
            'GB/Chip': amd_epyc_gen2['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_intel_sandybridge:
        aws_coefficients.append({
            'Architecture': 'Sandy Bridge',
            'Min Watts': intel_sandybridge['Idle watts'],
            'Max Watts': intel_sandybridge['100% watts'],
            'GB/Chip': intel_sandybridge['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_intel_ivybridge:
        aws_coefficients.append({
            'Architecture': 'Ivy Bridge',
            'Min Watts': intel_ivybridge['Idle watts'],
            'Max Watts': intel_ivybridge['100% watts'],
            'GB/Chip': intel_ivybridge['GB/Chip']
        })
        aws_platforms_check.append(cpu)
    
    elif cpu in cpus_intel_haswell:
        aws_coefficients.append({
            'Architecture': 'Haswell',
            'Min Watts': intel_haswell['Idle watts'],
            'Max Watts': intel_haswell['100% watts'],
            'GB/Chip': intel_haswell['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_intel_broadwell:
        aws_coefficients.append({
            'Architecture': 'Broadwell',
            'Min Watts': intel_broadwell['Idle watts'],
            'Max Watts': intel_broadwell['100% watts'],
            'GB/Chip': intel_broadwell['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_intel_skylake:
        aws_coefficients.append({
            'Architecture': 'Sky Lake',
            'Min Watts': intel_skylake['Idle watts'],
            'Max Watts': intel_skylake['100% watts'],
            'GB/Chip': intel_skylake['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_intel_cascadelake:
        aws_coefficients.append({
            'Architecture': 'Cascade Lake',
            'Min Watts': intel_cascadelake['Idle watts'],
            'Max Watts': intel_cascadelake['100% watts'],
            'GB/Chip': intel_cascadelake['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu in cpus_intel_coffeelake:
        aws_coefficients.append({
            'Architecture': 'Coffee Lake',
            'Min Watts': intel_coffeelake['Idle watts'],
            'Max Watts': intel_coffeelake['100% watts'],
            'GB/Chip': intel_coffeelake['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    # We don't know the values for the Graviton chips so
    # assume they are the same spec as AMD EPYC Gen 2 but listed separately
    elif cpu == 'Graviton':
        aws_coefficients.append({
            'Architecture': 'Graviton',
            'Min Watts': amd_epyc_gen2['Idle watts'],
            'Max Watts': amd_epyc_gen2['100% watts'],
            'GB/Chip': amd_epyc_gen2['GB/Chip']
        })
        aws_platforms_check.append(cpu)

    elif cpu == 'Graviton2':
        aws_coefficients.append({
            'Architecture': 'Graviton2',
            'Min Watts': amd_epyc_gen2['Idle watts'],
            'Max Watts': amd_epyc_gen2['100% watts'],
            'GB/Chip': amd_epyc_gen2['GB/Chip']
        })
        aws_platforms_check.append(cpu)
    
    else:
        print(f'No match for CPU: {cpu}')

# We expect to detect the following platforms
# Based on https://docs.google.com/spreadsheets/d/1YhtGO_UU9Hc162m7eQKYFQOnV4_yEK5_lgHYfl02JPE/edit#gid=1695769209
assert 'E5-2666 v3' in aws_platforms_check
assert 'E5-2676 v3' in aws_platforms_check
assert 'E5-2686 v4' in aws_platforms_check
assert 'E5-2650' in aws_platforms_check
assert 'E5-2665' in aws_platforms_check
assert 'E5-2670' in aws_platforms_check
assert 'E5-2651 v2' in aws_platforms_check
assert 'E5-2670 v2' in aws_platforms_check
assert 'E5-2680 v2' in aws_platforms_check
assert 'E7-8880 v3' in aws_platforms_check
assert '8124M' in aws_platforms_check
assert '8151' in aws_platforms_check
assert '8175M' in aws_platforms_check
assert '8176M' in aws_platforms_check
assert '8252C' in aws_platforms_check
assert '8259CL' in aws_platforms_check
assert '8275CL' in aws_platforms_check
# Commented out due to lack of Ice Lake SPECpower results
#assert '8375C' in aws_platforms_check
assert 'EPYC 7571' in aws_platforms_check
assert 'EPYC 7R32' in aws_platforms_check
assert 'Graviton' in aws_platforms_check
assert 'Graviton2' in aws_platforms_check
assert 'Core i7-8700B' in aws_platforms_check

aws_coefficients = pd.DataFrame(aws_coefficients)
aws_coefficients = aws_coefficients.drop_duplicates(ignore_index=True)

print(f'Average: Min Watts = {aws_coefficients["Min Watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(aws_coefficients["Min Watts"].mean())) == 1.14

print(f'Average: Max Watts = {aws_coefficients["Max Watts"].mean():,.2f}')
# assert float('{:,.2f}'.format(aws_coefficients["Max Watts"].mean())) == 4.34

print(f'Average: GB/Chip = {aws_coefficients["GB/Chip"].mean():,.2f}')
# assert float('{:,.2f}'.format(aws_coefficients["GB/Chip"].mean())) == 73.22

aws_coefficients.to_csv('output/coefficients-aws-use.csv')

No match for CPU: 8375C
Average: Min Watts = 2.02
Average: Max Watts = 4.06
Average: GB/Chip = 74.87


### Embodied emissions

The list of instances with embodied emissions is output to
`output/coefficients-aws-embodied.csv` ready for import into
[CCF](https://github.com/cloud-carbon-footprint/cloud-carbon-footprint).

In [267]:
aws_instances_embodied = []

for key, instance in aws_instances.iterrows():
    # Call our calculation methods for each of the additional components
    additional_memory = calculate_additional_memory_emissions(
        instance['Platform Memory (in GB)'])

    additional_storage = calculate_additional_storage_emissions(
        instance['Storage Type'],
        instance['Platform Storage Drive Quantity'])

    additional_cpus = calculate_additional_cpu_emissions(
        'aws',
        instance['Platform CPU Name'])
    
    additional_gpus = calculate_additional_gpu_emissions(
        instance['Platform GPU Quantity'])

    # Build a dictionary of the instance emissions
    aws_instances_embodied.append({
        'type': instance['Instance type'],
        'additional_memory': round(additional_memory, 2),
        'additional_storage': round(additional_storage, 2),
        'additional_cpus': round(additional_cpus, 2),
        'additional_gpus': round(additional_gpus, 2),
        'total': round(BASE_MANUFACTURING_EMISSIONS + additional_memory + additional_storage + additional_cpus + additional_gpus, 2)})

aws_instances_embodied = pd.DataFrame(aws_instances_embodied)

# Pick some random instances to test the results are as expected
result = aws_instances_embodied.query('type == "a1.medium"')
assert np.isclose(result['additional_memory'], 22.21)
assert np.isclose(result['additional_storage'], 0)
assert np.isclose(result['additional_cpus'], 0)
assert np.isclose(result['additional_gpus'], 0)
assert np.isclose(result['total'], 1022.21)

result = aws_instances_embodied.query('type == "c3.xlarge"')
assert np.isclose(result['additional_memory'], 61.07)
assert np.isclose(result['additional_storage'], 200.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 0)
assert np.isclose(result['total'], 1361.07)

result = aws_instances_embodied.query('type == "g4dn.xlarge"')
assert np.isclose(result['additional_memory'], 510.79)
assert np.isclose(result['additional_storage'], 200.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 1200.0)
assert np.isclose(result['total'], 3010.79)

aws_instances_embodied.to_csv('output/coefficients-aws-embodied.csv')

## GCP

### Use stage coefficients

These values go in [`packages/gcp/src/domain/GcpFootprintEstimationConstants.ts`](https://github.com/cloud-carbon-footprint/cloud-carbon-footprint/blob/trunk/packages/gcp/src/domain/GcpFootprintEstimationConstants.ts)

In [271]:
gcp_instances = pd.read_csv('data/gcp-instances.csv', na_values=['NC'])
gcp_architectures = gcp_instances['Microarchitecture'].unique()
gcp_coefficients = []

for cpu in gcp_architectures:
    if cpu == 'EPYC 1st Gen':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': amd_epyc_gen1['Idle watts'],
            'Max Watts': amd_epyc_gen1['100% watts GCP adjusted'].mean(),
            'GB/Chip': amd_epyc_gen1['GB/Chip']
        })

    elif cpu == 'EPYC 2nd Gen':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': amd_epyc_gen2['Idle watts'],
            'Max Watts': amd_epyc_gen2['100% watts GCP adjusted'].mean(),
            'GB/Chip': amd_epyc_gen2['GB/Chip']
        })

    elif cpu == 'EPYC 3rd Gen':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': amd_epyc_gen3['Idle watts'],
            'Max Watts': amd_epyc_gen3['100% watts GCP adjusted'].mean(),
            'GB/Chip': amd_epyc_gen3['GB/Chip']
        })

    elif cpu == 'Sandy Bridge':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_sandybridge['Idle watts'],
            'Max Watts': intel_sandybridge['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_sandybridge['GB/Chip']
        })

    elif cpu == 'Ivy Bridge':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_ivybridge['Idle watts'],
            'Max Watts': intel_ivybridge['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_ivybridge['GB/Chip']
        })

    elif cpu == 'Haswell':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_haswell['Idle watts'],
            'Max Watts': intel_haswell['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_haswell['GB/Chip']
        })
    elif cpu == 'Broadwell':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_broadwell['Idle watts'],
            'Max Watts': intel_broadwell['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_broadwell['GB/Chip']
        })

    elif cpu == 'Skylake':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_skylake['Idle watts'],
            'Max Watts': intel_skylake['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_skylake['GB/Chip']
        })

    elif cpu == 'Cascade Lake':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_cascadelake['Idle watts'],
            'Max Watts': intel_cascadelake['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_cascadelake['GB/Chip']
        })
    elif cpu == 'Coffee Lake':
        gcp_coefficients.append({
            'Architecture': cpu,
            'Min Watts': intel_coffeelake['Idle watts'],
            'Max Watts': intel_coffeelake['100% watts GCP adjusted'].mean(),
            'GB/Chip': intel_coffeelake['GB/Chip']
        })
        
    else:
        print(f'No match for CPU: {cpu}')

gcp_coefficients = pd.DataFrame(gcp_coefficients)
gcp_coefficients = gcp_coefficients.drop_duplicates(ignore_index=True)

print(f'Median: Min Watts = {gcp_coefficients["Min Watts"].median():,.2f}')
# assert float('{:,.2f}'.format(gcp_coefficients["Min Watts"].median())) == 0.68

print(f'Median: Max Watts = {gcp_coefficients["Max Watts"].median():,.2f}')
# assert float('{:,.2f}'.format(gcp_coefficients["Max Watts"].median())) == 3.77

print(f'Median: GB/Chip = {gcp_coefficients["GB/Chip"].median():,.2f}')
# assert float('{:,.2f}'.format(gcp_coefficients["GB/Chip"].mean())) == 73.22

gcp_coefficients.to_csv('output/coefficients-gcp-use.csv')

Median: Min Watts = 0.70
Median: Max Watts = 3.79
Median: GB/Chip = 74.93


### Embodied emissions

The first iteration outputs embodied emissions values for each instance type and
CPU architecture. The second iteration outputs the mean value of the total
embodied emissions for all CPU architectures associated with the instance type.

This is because we don't know the precise architectures each instance type runs
on, so we take an average across all possible architectures.

The list of instances with embodied emissions is output to
`output/coefficients-gcp-embodied.csv` and grouped by type at
`coefficients-gcp-embodied-mean.csv` ready for import into
[CCF](https://github.com/cloud-carbon-footprint/cloud-carbon-footprint).

In [270]:
# First iteration for all instance types and all CPU types
gcp_instances_embodied = []

for key, instance in gcp_instances.iterrows():
    # Call our calculation methods for each of the additional components
    additional_memory = calculate_additional_memory_emissions(
        instance['Platform Memory'])

    additional_storage = calculate_additional_storage_emissions(
        instance['Platform Storage Type'],
        instance['Platform (largest instance) Storage Drive quantity'])

    additional_cpus = calculate_additional_cpu_emissions(
        'gcp',
        instance['Microarchitecture'])

    additional_gpus = calculate_additional_gpu_emissions(
        instance['Platform GPU'])
    
    # Build a dictionary of the instance emissions
    gcp_instances_embodied.append({
        'family': instance['Machine Family'],
        'type': instance['Machine type'],
        'microarchitecture': instance['Microarchitecture'],
        'additional_memory': round(additional_memory, 2),
        'additional_storage': round(additional_storage, 2),
        'additional_cpus': round(additional_cpus, 2),
        'additional_gpus': round(additional_gpus, 2),
        'total': round(BASE_MANUFACTURING_EMISSIONS + additional_memory + additional_storage + additional_cpus + additional_gpus, 2)
    })

gcp_instances_embodied = pd.DataFrame(gcp_instances_embodied)

# Pick some random instances to test the results are as expected
result = gcp_instances_embodied.query('type == "e2-standard-2" and microarchitecture == "Skylake"')
assert np.isclose(result['additional_memory'], 155.46)
assert np.isclose(result['additional_storage'], 0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 0)
assert np.isclose(result['total'], 1255.46)

result = gcp_instances_embodied.query('type == "n2-standard-4" and microarchitecture == "Cascade Lake"')
assert np.isclose(result['additional_memory'], 688.46)
assert np.isclose(result['additional_storage'], 100.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 0)
assert np.isclose(result['total'], 1888.46)

result = gcp_instances_embodied.query('type == "a2-highgpu-8g" and microarchitecture == "Cascade Lake"')
assert np.isclose(result['additional_memory'], 1865.5)
assert np.isclose(result['additional_storage'], 100.0)
assert np.isclose(result['additional_cpus'], 100.0)
assert np.isclose(result['additional_gpus'], 2400)
assert np.isclose(result['total'], 5465.5)

gcp_instances_embodied.to_csv('output/coefficients-gcp-embodied.csv')

# Second iteration to aggregate by instance type and output the mean
instance_types = gcp_instances_embodied.drop_duplicates(subset="type")

gcp_instances_type_embodied = []

for key, instance in instance_types.iterrows():
    instanceType = str(instance['type'])
    result = gcp_instances_embodied.query(f'`type` == "{instanceType}"')

    gcp_instances_type_embodied.append({
        'type': instance['type'],
        'total_mean': result['total'].mean()})

gcp_instances_type_embodied = pd.DataFrame(gcp_instances_type_embodied)

# Pick some random instances to test the results are as expected
result = gcp_instances_type_embodied.query('type == "e2-standard-2"')
assert np.isclose(result['total_mean'], 1230.46)

result = gcp_instances_type_embodied.query('type == "n2-standard-4"')
assert np.isclose(result['total_mean'], 1888.46)

result = gcp_instances_type_embodied.query('type == "a2-highgpu-8g"')
assert np.isclose(result['total_mean'], 5465.5)

gcp_instances_type_embodied.to_csv('output/coefficients-gcp-embodied-mean.csv')