# Lesson 5: Understanding your Google Cloud Footprint

In [1]:
from helper import authenticate
CREDENTIALS, PROJECT_ID = authenticate()

In [2]:
from google.cloud import bigquery

In [3]:
import pandas as pd

* A function to export the dataset as pandas data frame.

In [4]:
def run_bq_query(sql):

    bq_client = bigquery.Client(
        project = PROJECT_ID,
        credentials = CREDENTIALS)

    job_config = bigquery.QueryJobConfig()
    client_result = bq_client.query(
        sql,
        job_config=job_config)

    job_id = client_result.job_id
    
    df = client_result.result().to_arrow().to_pandas()
    print(f"Finished job_id: {job_id}")
    return df

* Define the query.

In [5]:
query = f"""
SELECT * from `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`
LIMIT 5
"""

In [6]:
sample_df = run_bq_query(query)

Finished job_id: 7f0adf68-8a04-4a55-8ed9-47200c011991


In [7]:
# Print the dataframe
sample_df

Unnamed: 0,location,billing_account_id,carbon_footprint_kgCO2e,project,usage_month,carbon_footprint_total_kgCO2e,service,carbon_model_version
0,"{'location': 'europe-southwest1', 'region': 'e...",12345,"{'scope1': 1.9656928365246684e-07, 'scope2': {...","{'id': 'genai-sandbox', 'number': 11111}",2022-12-01,{'location_based': 0.00016346808320641425},"{'description': 'Cloud Run', 'id': '152E-C115-...",7
1,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 9.133650100104014e-11, 'scope2': {'...","{'id': 'genai-sandbox', 'number': 11111}",2022-12-01,{'location_based': 1.0665550669434009e-07},"{'description': 'Cloud Pub/Sub', 'id': 'A1E8-B...",7
2,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 7.276418114294598e-05, 'scope2': {'...","{'id': 'genai-sandbox', 'number': 11111}",2021-08-01,{'location_based': 0.08484602626557447},"{'description': 'Cloud Dataflow', 'id': '57D6-...",6
3,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 0.06795997971538453, 'scope2': {'lo...","{'id': 'genai-sandbox', 'number': 11111}",2021-08-01,{'location_based': 73.26497053897295},"{'description': 'Cloud Vision API', 'id': 'C08...",6
4,"{'location': 'us', 'region': None}",12345,"{'scope1': 2.8320308582156193e-08, 'scope2': {...","{'id': 'genai-sandbox', 'number': 11111}",2022-11-01,{'location_based': 2.607932476900128e-05},"{'description': 'BigQuery', 'id': '24E6-581D-3...",6


In [8]:
print(sample_df['carbon_footprint_kgCO2e'][0])

{'scope1': 1.9656928365246684e-07, 'scope2': {'location_based': 4.966157377571001e-05}, 'scope3': 0.00011360994014705176}


In [9]:
print(sample_df['service'][0])

{'description': 'Cloud Run', 'id': '152E-C115-5142'}


In [10]:
print(sample_df['carbon_footprint_total_kgCO2e'][0])

{'location_based': 0.00016346808320641425}


In [11]:
# Calculate total carbon footprint
sample_df['carbon_footprint_kgCO2e'][0]['scope1'] + \
sample_df['carbon_footprint_kgCO2e'][0]['scope2']['location_based'] +\
sample_df['carbon_footprint_kgCO2e'][0]['scope3']

0.00016346808320641425

* More examples of query.

In [12]:
# Select from a specific service, in this case BigQuery
query = f"""
SELECT SUM(carbon_footprint_kgCO2e.scope2.location_based)
FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`
WHERE service.description = "BigQuery"
"""

In [13]:
df = run_bq_query(query)

Finished job_id: 287af1a3-03e8-4af0-b6f0-e67dfdabf9da


In [14]:
# Print
df

Unnamed: 0,f0_
0,0.199101


In [15]:
# Select specific column values
query = f"""
SELECT
    usage_month,
    service.description,
    location.location,
    carbon_footprint_total_kgCO2e.location_based
FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`
WHERE project.number = 11111
ORDER BY usage_month, service.description
"""

In [16]:
df = run_bq_query(query)

Finished job_id: ca583561-884a-4ce0-b02f-fdbc0d59fba7


In [17]:
# Print
df

Unnamed: 0,usage_month,description,location,location_based
0,2021-06-01,Cloud Build,us-east1-d,0.001797
1,2021-06-01,Cloud Build,us-east4-c,0.001940
2,2021-06-01,Cloud Machine Learning Engine,us-central1,49.691373
3,2021-06-01,Cloud Storage,us,0.011047
4,2021-06-01,Cloud Storage,us-central1,0.008751
...,...,...,...,...
347,2024-01-01,Cloud Storage,us-central1,0.933052
348,2024-01-01,Compute Engine,us-west1,1.026506
349,2024-01-01,Compute Engine,us-central1,12.219985
350,2024-01-01,Notebooks,us-central1,6.250810


In [18]:
# Total amount of emisions from all projects 
query = f"""
SELECT DISTINCT SUM(carbon_footprint_total_kgCO2e.location_based) as carbon_emissions, project.number
FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`
GROUP BY project.number
"""

In [19]:
df = run_bq_query(query)

Finished job_id: 24c71606-ae9c-4e44-b964-259e2bd6f5d6


In [20]:
# Print
df

Unnamed: 0,carbon_emissions,number
0,15854.736771,11111
1,12035.135505,33333
2,520.707209,22222


In [21]:
query = f"""
SELECT DISTINCT SUM(carbon_footprint_total_kgCO2e.location_based)
FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`
"""

In [22]:
df = run_bq_query(query)

Finished job_id: eae37f03-ff3a-4447-85ff-74dd947c7d4f


In [23]:
# Add the results before to compare the result from the recent query
15854.736771 + 12035.135505 + 520.707209

28410.579485000002

In [24]:
df

Unnamed: 0,f0_
0,28410.579484


In [25]:
28410/986

28.81338742393509

* Load data into pandas dataframe.

In [26]:
query = f"""
SELECT *
FROM `sc-gcp-c5-carbon-emissions.carbonfootprint.sample_data`
"""

In [27]:
df = run_bq_query(query)

Finished job_id: e30f7a16-f89d-4f4d-b356-c5dac16796dc


In [28]:
# Print
df

Unnamed: 0,location,billing_account_id,carbon_footprint_kgCO2e,project,usage_month,carbon_footprint_total_kgCO2e,service,carbon_model_version
0,"{'location': 'europe-southwest1', 'region': 'e...",12345,"{'scope1': 1.9656928365246684e-07, 'scope2': {...","{'id': 'genai-sandbox', 'number': 11111}",2022-12-01,{'location_based': 0.00016346808320641425},"{'description': 'Cloud Run', 'id': '152E-C115-...",7
1,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 9.133650100104014e-11, 'scope2': {'...","{'id': 'genai-sandbox', 'number': 11111}",2022-12-01,{'location_based': 1.0665550669434009e-07},"{'description': 'Cloud Pub/Sub', 'id': 'A1E8-B...",7
2,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 7.276418114294598e-05, 'scope2': {'...","{'id': 'genai-sandbox', 'number': 11111}",2021-08-01,{'location_based': 0.08484602626557447},"{'description': 'Cloud Dataflow', 'id': '57D6-...",6
3,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 0.06795997971538453, 'scope2': {'lo...","{'id': 'genai-sandbox', 'number': 11111}",2021-08-01,{'location_based': 73.26497053897295},"{'description': 'Cloud Vision API', 'id': 'C08...",6
4,"{'location': 'us', 'region': None}",12345,"{'scope1': 2.8320308582156193e-08, 'scope2': {...","{'id': 'genai-sandbox', 'number': 11111}",2022-11-01,{'location_based': 2.607932476900128e-05},"{'description': 'BigQuery', 'id': '24E6-581D-3...",6
...,...,...,...,...,...,...,...,...
571,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 0.0005837179844260626, 'scope2': {'...","{'id': 'genai-sandbox', 'number': 11111}",2023-02-01,{'location_based': 0.6031637397335557},"{'description': 'Stackdriver Monitoring', 'id'...",8
572,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 0.13040957993318006, 'scope2': {'lo...","{'id': 'genai-sandbox', 'number': 11111}",2021-09-01,{'location_based': 135.32412273047785},{'description': 'Cloud Machine Learning Engine...,6
573,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 0.04651798649950839, 'scope2': {'lo...","{'id': 'genai-sandbox', 'number': 11111}",2021-07-01,{'location_based': 50.880526007500706},{'description': 'Cloud Machine Learning Engine...,6
574,"{'location': 'us-central1', 'region': 'us-cent...",12345,"{'scope1': 0.06828994782612086, 'scope2': {'lo...","{'id': 'genai-sandbox', 'number': 11111}",2021-08-01,{'location_based': 73.57722669866956},{'description': 'Cloud Machine Learning Engine...,6
