# Testing out GCP billing export

In [1]:
from google.cloud import bigquery
import os
import tqdm
import numpy as np
import pandas as pd
import pandas_gbq
from config import My_Config as cfg

In [2]:
credential_path = cfg.gcp_credential_path()
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path
project_id='gcp-prices-358310'
dataset_location='europe-west4'

In [3]:
client = bigquery.Client(project=project_id, location=dataset_location)

In [4]:
query = """SELECT invoice.month as invoiceMonth,billing_account_id,sum(cost) as cost,service.id as serviceId,service.description as serviceDescription,sku.description as sku,resource.name as resourceName,project.id as projectId,project.name as projectName,TO_JSON_STRING(project.labels) as labelsid,currency,location.location as region,usage.unit as usageUnit,sum(usage.amount) as usageAmount,cost_type, cast(usage_start_time as date) as startdate,cast(usage_end_time as date) as enddate FROM {table} WHERE billing_account_id = {billing_account_id} AND invoice.month = '202208' AND cast(usage_start_time as date) >= date_add(CURRENT_DATE(),INTERVAL -30 DAY) AND usage.amount > 0 GROUP BY startdate, enddate, billing_account_id, cost_type, serviceId, region, usageUnit, projectName, projectId,currency, invoiceMonth, serviceDescription, sku, resourceName, labelsid""".format(table=cfg.gcp_gbq_table, billing_account_id=cfg.gcp_billing_account_id, )

In [5]:
query = """SELECT invoice.month as invoiceMonth,billing_account_id,sum(cost) as cost,service.id as serviceId,service.description as serviceDescription,sku.description as sku,project.id as projectId,project.name as projectName,TO_JSON_STRING(project.labels) as labelsid,currency,location.location as region, resource.name as resourceName, usage.unit as usageUnit,sum(usage.amount) as usageAmount,cost_type, cast(usage_start_time as date) as startdate,cast(usage_end_time as date) as enddate FROM `gcp-prices-358310.billing.gcp_billing_export_resource_v1_017DA2_854255_CCEB23` GROUP BY startdate, enddate, billing_account_id, cost_type, serviceId, region, resourceName, usageUnit, projectName, projectId,currency, invoiceMonth, serviceDescription, sku, labelsid"""
data = pandas_gbq.read_gbq(query, project_id=project_id, dialect='standard')

Downloading: 100%|██████████| 109/109 [00:00<00:00, 372.77rows/s]


In [6]:
df = pd.DataFrame(data)
len_row, len_col = df.shape
print(df.shape)
print(df.columns)

(109, 17)
Index(['invoiceMonth', 'billing_account_id', 'cost', 'serviceId',
       'serviceDescription', 'sku', 'projectId', 'projectName', 'labelsid',
       'currency', 'region', 'resourceName', 'usageUnit', 'usageAmount',
       'cost_type', 'startdate', 'enddate'],
      dtype='object')


In [7]:
#Make a directory if it doesn't exist
if not os.path.exists('gcp'):
    os.makedirs('gcp')

df.to_csv('gcp/GCP-Billing-Data-v1.csv', index=False)
df.to_parquet('gcp/GCP-Billing-Data-v1.parquet', index=False)

In [8]:
print(df)

    invoiceMonth    billing_account_id  cost       serviceId  \
0         202208  017DA2-854255-CCEB23   0.0  24E6-581D-38E5   
1         202208  017DA2-854255-CCEB23   0.0  24E6-581D-38E5   
2         202208  017DA2-854255-CCEB23   0.0  95FF-2EF5-5EA1   
3         202209  017DA2-854255-CCEB23   0.0  5490-F7B7-8DF6   
4         202208  017DA2-854255-CCEB23   0.0  5490-F7B7-8DF6   
..           ...                   ...   ...             ...   
104       202208  017DA2-854255-CCEB23   0.0  5490-F7B7-8DF6   
105       202208  017DA2-854255-CCEB23   0.0  24E6-581D-38E5   
106       202208  017DA2-854255-CCEB23   0.0  5490-F7B7-8DF6   
107       202208  017DA2-854255-CCEB23   0.0  5490-F7B7-8DF6   
108       202208  017DA2-854255-CCEB23   0.0  24E6-581D-38E5   

    serviceDescription                                   sku  \
0             BigQuery               Analysis (europe-west4)   
1             BigQuery         Active Storage (europe-west4)   
2        Cloud Storage  Regional Standa