In [363]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.mgmt.resource import ResourceManagementClient, SubscriptionClient
from azure.mgmt.compute import ComputeManagementClient

import requests, json, os

# Acquire a credential object
credential = InteractiveBrowserCredential()

CLIMATIQ_KEY = "J00J1VB0J9M1H8Q3TRP7GTTHXPFG"

In [366]:
credential.authenticate()

<azure.identity._auth_record.AuthenticationRecord at 0x29a6a1930>

In [364]:
REGION_MAPPING = {
    "azure" : {
        "australiacentral": "australia_central",
        "australiacentral2": "australia_central_2",
        "australiaeast": "australia_east",
        "australiasoutheast": "australia_south_east",
        "brazilsouth": "brazil_south",
        "brazilsoutheast": "brazil_south_east",
        "canadacentral": "canada_central",
        "canadaeast": "canada_east",
        "centralindia": "central_india",
        "centralus": "central_us",
        "eastasia": "east_asia",
        "eastus": "east_us",
        "eastus2": "east_us_2",
        "eastus3": "east_us_3",
        "francecentral": "france_central",
        "francesouth": "france_south",
        "germanynorth": "germany_north",
        "germanywestcentral": "germany_west_central",
        "indiacentral": "india_central",
        "indiasouth": "india_south",
        "indiawest": "india_west",
        "japaneast": "japan_east",
        "japanwest": "japan_west",
        "koreaeast": "korea_east",
        "koreasouth": "korea_south",
        "northcentralus": "north_central_us",
        "northeurope": "north_europe",
        "norwayeast": "norway_east",
        "norwaywest": "norway_west",
        "southafrica": "south_africa",
        "southafricanorth": "south_africa_north",
        "southafricawest": "south_africa_west",
        "southcentralus": "south_central_us",
        "southindia": "south_india",
        "southeastasia": "southeast_asia",
        "switzerlandnorth": "switzerland_north",
        "switzerlandwest": "switzerland_west",
        "uksouth": "uk_south",
        "ukwest": "uk_west",
        "unitedarabemirates": "united_arab_emirates",
        "unitedarabemiratescentral": "united_arab_emirates_central",
        "unitedarabemiratesnorth": "united_arab_emirates_north",
        "westcentralus": "west_central_us",
        "westeurope": "west_europe",
        "westindia": "west_india",
        "westus": "west_us",
        "westus2": "west_us_2",
        "westus3": "west_us_3"
    }
}

In [382]:
# Retrieve subscription ID from environment variable.
gdd_subscription_id = "5ddf05c0-b972-44ca-b90a-3e49b5de80dd" # GDD playground

def get_available_vms(credential, subscription_id):
    client = ComputeManagementClient(credential, subscription_id)

    vm_list_details = {
        sku.name: {
            "tier": sku.tier,
            "size": sku.size,
            "vCPUS": int([c.value for c in sku.capabilities if str(c.name) == "vCPUs"][0]),
            "capabilities" : {c.name : c.value for c in sku.capabilities}
        } for sku in client.resource_skus.list() if "virtualMachine" in sku.resource_type }

    return vm_list_details

def get_az_resources(credential, subscription_ids=[], databricks_only=False):
    resources = []

    if isinstance(subscription_ids, str):
        subscription_ids = [subscription_ids]

    # If no subscription is specified, get all subscriptions for the credential
    if len(subscription_ids) == 0:
        subs = SubscriptionClient(credential)
        subscription_ids = [sub.subscription_id for sub in list(subs.subscriptions.list())]
    
    for subscription_id in subscription_ids:
        available_vms = []
        resource_client = ResourceManagementClient(credential, subscription_id)
        group_list = list(resource_client.resource_groups.list())

        print(f"Fetching {len(group_list)} resources from subscription '{subscription_id}'")

        if databricks_only:
            group_list = [group for group in group_list if "databricks-rg" in group.name]

        for group in group_list:
            for item in resource_client.resources.list_by_resource_group(group.name, expand="createdTime"):
                if item.type in ["Microsoft.Storage/storageAccounts", "Microsoft.Compute/virtualMachines"]:
                    resource_dict = {
                        "group_name": group.name, 
                        "group_is_databricks": "databricks-rg" in group.name,
                        "item_id": item.id,
                        "item_type": item.type,
                        "item_name": item.name,
                        "item_location": item.location,
                        "item_created_time": str(item.created_time)
                    }

                    resource_details = resource_client.resources.get_by_id(item.id, api_version="2021-04-01")

                    if "hardwareProfile" in resource_details.properties:
                        resource_dict["vm_name"] = resource_details.properties["hardwareProfile"]["vmSize"]

                        if len(available_vms) == 0:
                            print(f"Fetching available vms for subscription '{subscription_id}'")
                            available_vms = get_available_vms(credential, subscription_id)
                        
                        if resource_dict["vm_name"] in available_vms.keys():
                            resource_dict["num_cpus"] = available_vms[resource_dict["vm_name"]]["vCPUS"]

                    resources.append(resource_dict)

    return resources

In [368]:
az_resources = get_az_resources(credential)

Fetching 19 resources from subscription '1e663d00-d459-4c1d-88ba-7877566362e6'
Fetching available vms for subscription '1e663d00-d459-4c1d-88ba-7877566362e6'
Fetching 12 resources from subscription '93003cb9-c1a6-48d2-b9a9-757ca760743e'
Fetching available vms for subscription '93003cb9-c1a6-48d2-b9a9-757ca760743e'


In [383]:
vms = get_available_vms(credential, '1e663d00-d459-4c1d-88ba-7877566362e6')

In [384]:
vms

{'Basic_A0': {'tier': 'Basic',
  'size': 'A0',
  'vCPUS': 1,
  'capabilities': {'MaxResourceVolumeMB': '20480',
   'OSVhdSizeMB': '1047552',
   'vCPUs': '1',
   'MemoryPreservingMaintenanceSupported': 'True',
   'HyperVGenerations': 'V1',
   'MemoryGB': '0.75',
   'MaxDataDiskCount': '1',
   'CpuArchitectureType': 'x64',
   'LowPriorityCapable': 'False',
   'PremiumIO': 'False',
   'VMDeploymentTypes': 'IaaS',
   'vCPUsAvailable': '1',
   'ACUs': '50',
   'vCPUsPerCore': '1',
   'EphemeralOSDiskSupported': 'False',
   'EncryptionAtHostSupported': 'False',
   'CapacityReservationSupported': 'False',
   'AcceleratedNetworkingEnabled': 'False',
   'RdmaEnabled': 'False',
   'MaxNetworkInterfaces': '2'}},
 'Basic_A1': {'tier': 'Basic',
  'size': 'A1',
  'vCPUS': 1,
  'capabilities': {'MaxResourceVolumeMB': '40960',
   'OSVhdSizeMB': '1047552',
   'vCPUs': '1',
   'MemoryPreservingMaintenanceSupported': 'True',
   'HyperVGenerations': 'V1',
   'MemoryGB': '1.75',
   'MaxDataDiskCount': '2',

In [369]:
def get_monitoring_metric(resource_id: str, metrics: list, credential, timespan):
    token =credential.get_token("https://management.azure.com/user_impersonation")
    metrics = ",".join(metrics)
    
    url = f"https://management.azure.com/{resource_id}/providers/microsoft.insights/metrics?api-version=2018-01-01&metricnames={metrics}&timespan={timespan}"
    headers = {"Authorization" : f"Bearer {token.token}"}
    params = {"aggregation" : "average", "interval": "P1D"}

    res = requests.get(url, headers=headers, params=params)

    return json.loads(res.text)

In [370]:
metrics = ["Percentage CPU"]
timespan = "2022-09-07T01:00:00Z/2022-10-16T00:00:00Z"

for r in az_resources:
    if r["item_type"] == "Microsoft.Compute/virtualMachines":
        usage = get_monitoring_metric(r["item_id"], metrics, credential, timespan)
        r["usage"] = {
            "metrics": metrics,
            "cost" : usage["cost"],
            "data": [t["data"] for t in usage["value"][0]["timeseries"]] # take first statistic (avg) for all metrics in resource
        }

In [371]:
az_resources[:2]

[{'group_name': 'rg-vckgdp-terra-uprod',
  'group_is_databricks': False,
  'item_id': '/subscriptions/1e663d00-d459-4c1d-88ba-7877566362e6/resourceGroups/rg-vckgdp-terra-uprod/providers/Microsoft.Storage/storageAccounts/stvckgdpterrauprod',
  'item_type': 'Microsoft.Storage/storageAccounts',
  'item_name': 'stvckgdpterrauprod',
  'item_location': 'westeurope',
  'item_created_time': '2021-09-23 13:17:23.738956+00:00'},
 {'group_name': 'rg-vckgdp-terra-udev',
  'group_is_databricks': False,
  'item_id': '/subscriptions/1e663d00-d459-4c1d-88ba-7877566362e6/resourceGroups/rg-vckgdp-terra-udev/providers/Microsoft.Storage/storageAccounts/stvckgdpterraudev',
  'item_type': 'Microsoft.Storage/storageAccounts',
  'item_name': 'stvckgdpterraudev',
  'item_location': 'westeurope',
  'item_created_time': '2021-09-23 13:21:09.287786+00:00'}]

In [372]:
def get_co2_data():
    if not "CO2_DATA" in globals():
        set_co2_data({
            "cpu": {},
            "memory": {},
            "storage": {}
        })
    
    return CO2_DATA


def set_co2_data(co2_data):
    global CO2_DATA
    CO2_DATA = co2_data

    return CO2_DATA


def get_climatiq_api_key():
    if "CLIMATIQ_KEY" in globals():
        return CLIMATIQ_KEY
    elif "CLIMATIQ_KEY" in os.environ():
        return os.environ.get("CLIMATIQ_KEY")


def calculate_compute_emission(provider, region, cpu_count, cpu_load, duration, duration_unit):
    co2_data = get_co2_data()
    climatiq_api_key = get_climatiq_api_key()

    # normalise region
    region_mapping = REGION_MAPPING
    if region in region_mapping[provider].keys():
        region = region_mapping[provider][region]
    
    # Fetch data for calculation if not exists
    if not provider in co2_data["cpu"].keys():
        co2_data["cpu"][provider] = {}
        co2_data["cpu"][provider][region] = retrieve_climatiq_data(climatiq_api_key, provider, region)["co2e"]
        set_co2_data(co2_data)

    if not region in co2_data["cpu"][provider].keys():
        co2_data["cpu"][provider][region] = retrieve_climatiq_data(climatiq_api_key, provider, region)["co2e"]
        set_co2_data(co2_data)

    # Normalise to hourly units
    if duration_unit == "minute":
        duration = duration / 60
    elif duration_unit == "day":
        duration = duration * 24

    # Normalise cpu load if used in percentage
    if cpu_load > 1:
        cpu_load = cpu_load / 100

    return CO2_DATA["cpu"][provider][region] * cpu_count * cpu_load * duration


def retrieve_climatiq_data(api_key, provider, region, type="cpu", duration=1, duration_unit="h"):
    url = f"https://beta3.api.climatiq.io/compute/{provider}/{type}"
    headers = {"Authorization" : f"Bearer {api_key}"}

    data = {
        "cpu_count": 1,
        "region": region,
        "cpu_load": 1,
        "duration": int(duration),
        "duration_unit": duration_unit
    }

    res = requests.post(url, headers=headers, json=data)

    if not str(res.status_code)[0] == "2":
        raise Exception(f"Error calling climatiq API: {res.text}")

    return res.json()

In [373]:
import pandas as pd

import plotly.express as px
import plotly.graph_objs as go

pd.options.plotting.backend = "plotly"

In [374]:
data = []

for r in az_resources:
    if "usage" in r.keys():
        data.extend([(
                f"{r['group_name']} - {r['item_name']}", 
                r["item_location"],
                r["num_cpus"],
                val["timeStamp"].split("T")[0], 
                val["average"]
            ) for val in r["usage"]["data"][0] if "average" in val.keys()] 
        )

data[0]

('rg-vckgdp-inges-udev - vm-vckgdp-inges-udev',
 'westeurope',
 2,
 '2022-09-15',
 11.21287599861063)

In [375]:
df = pd.DataFrame(data, columns=["group_name", "location", "num_cpus", "date", "avg_cpu_utilisation"])
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")
df.head()

Unnamed: 0_level_0,group_name,location,num_cpus,avg_cpu_utilisation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-09-15,rg-vckgdp-inges-udev - vm-vckgdp-inges-udev,westeurope,2,11.212876
2022-09-16,rg-vckgdp-inges-udev - vm-vckgdp-inges-udev,westeurope,2,9.177477
2022-09-17,rg-vckgdp-inges-udev - vm-vckgdp-inges-udev,westeurope,2,9.139667
2022-09-18,rg-vckgdp-inges-udev - vm-vckgdp-inges-udev,westeurope,2,8.846148
2022-09-19,rg-vckgdp-inges-udev - vm-vckgdp-inges-udev,westeurope,2,8.791565


In [376]:
df["co2e_kg"] = df.apply(lambda row: calculate_compute_emission("azure", row["location"], row["num_cpus"], row["avg_cpu_utilisation"], 1, "day"), axis=1)

In [379]:
def get_co2_equivalent(co2_kg, type="car_kilometers"):
    if type == "car_kilometers":
        # Equivalent KMs for Toyota Corolla 2020 (i.e. most popular car in the world)
        factor = 0.196974607

    return f"{round(co2_kg * factor, 2)} {type}"

get_co2_equivalent(df["co2e_kg"].sum())

'0.17 car_kilometers'

In [377]:
fig = px.bar(df, x=df.index, y="co2e_kg", color="group_name", template="plotly_dark")
fig.update_layout(legend=dict(orientation="h", y=-0.3))
fig.show()
