In [None]:
from pydatafabric.gcp import bq_to_pandas
from sqlalchemy import create_engine

In [None]:
engine = create_engine("postgresql+psycopg2://aim:!aim00@172.27.124.13/aim")

### BigQuery 일별 사용량

In [None]:
df = bq_to_pandas("""
  WITH data as
  (
    SELECT
      protopayload_auditlog.authenticationInfo.principalEmail as principalEmail,
      protopayload_auditlog.metadataJson AS metadataJson,
      CAST(JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson,
          "$.jobChange.job.jobStats.queryStats.totalBilledBytes") AS INT64) AS totalBilledBytes,
      CAST(TIMESTAMP_ADD(timestamp, INTERVAL 9 HOUR) AS DATE) AS baseDate
    FROM
      `emart-datafabric.audit_v2.cloudaudit_googleapis_com_data_access`
  )
  SELECT format_date('%Y-%m-%d', baseDate) as dt, count(baseDate) as query_count, FORMAT('%9.2f',SUM(totalBilledBytes)/POWER(2, 30)) total_billed_giga_bytes
  FROM
    data
  WHERE
    JSON_EXTRACT_SCALAR(metadataJson, "$.jobChange.job.jobConfig.type") = "QUERY"
    AND principalEmail LIKE '%@shinsegae.ai'
    GROUP BY baseDate
    ORDER BY baseDate 
""")

df.to_sql('temp_bigquery_stats_daily_usage', engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute("""
        INSERT INTO bigquery_stats_daily_usage
        SELECT date(dt) as dt, cast(query_count as int), cast(total_billed_giga_bytes as float)
        FROM temp_bigquery_stats_daily_usage
        ON CONFLICT (dt) 
        DO 
           UPDATE SET query_count = excluded.query_count, total_billed_giga_bytes = excluded.total_billed_giga_bytes
        
    """)

### BigQuery 최근 1주일 사용자별 사용량 및 쿼리수

In [None]:
df = bq_to_pandas("""
WITH data as
  (
    SELECT
      protopayload_auditlog.authenticationInfo.principalEmail as principalEmail,
      protopayload_auditlog.metadataJson AS metadataJson,
      CAST(JSON_EXTRACT_SCALAR(protopayload_auditlog.metadataJson,
          "$.jobChange.job.jobStats.queryStats.totalBilledBytes") AS INT64) AS totalBilledBytes,
      CAST(TIMESTAMP_ADD(timestamp, INTERVAL 9 HOUR) AS DATE) AS baseDate
    FROM
      `emart-datafabric.audit_v2.cloudaudit_googleapis_com_data_access`
    WHERE timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY)
  )
  SELECT
    split(principalEmail, '@')[offset(0)] as user_id,
    count(principalEmail) as query_count,
    FORMAT('%9.2f',SUM(totalBilledBytes)/POWER(2, 40)) AS total_billed_giga_bytes
  FROM
    data
  WHERE
    JSON_EXTRACT_SCALAR(metadataJson, "$.jobChange.job.jobConfig.type") = "QUERY"
    AND principalEmail LIKE '%@shinsegae.ai' AND principalEmail LIKE 'x%'
  GROUP BY principalEmail
  ORDER BY query_count DESC
""")

df.to_sql('temp_bigquery_stats_user_usage', engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute("""
        INSERT INTO bigquery_stats_user_usage (user_id, query_count, total_billed_giga_bytes)
        SELECT user_id, query_count, cast(total_billed_giga_bytes as float)
        FROM temp_bigquery_stats_user_usage
        ON CONFLICT (user_id) 
        DO 
           UPDATE SET query_count = excluded.query_count, total_billed_giga_bytes = excluded.total_billed_giga_bytes
    """)

### BigQuery 오늘 전체 용량

In [None]:
df = bq_to_pandas("""
    select format_timestamp('%Y-%m-%d', current_timestamp(), 'Asia/Seoul') as dt, sum(round(IEEE_DIVIDE(size_bytes, 1024*1024*1024))) as total_volume
    from `x1112275.all_tables*`
    where _table_suffix = (SELECT MAX(_TABLE_SUFFIX) FROM `x1112275.all_tables*`)
    and size_bytes > 100*1024*1024*1024
""")

df.to_sql('temp_bigquery_stats_dataset_volume', engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute("""
        INSERT INTO bigquery_stats_dataset_volume (dt, total_volume)
        SELECT date(dt), total_volume
        FROM temp_bigquery_stats_dataset_volume
        ON CONFLICT (dt) 
        DO 
           UPDATE SET total_volume = excluded.total_volume
    """)

### BigQuery 데이터세트 별 용량 비율

In [None]:
df = bq_to_pandas("""
    with total as (
        select sum(round(IEEE_DIVIDE(size_bytes, 1024*1024*1024))) as value
        from `x1112275.all_tables*`
        where _table_suffix = (SELECT MAX(_TABLE_SUFFIX) FROM `x1112275.all_tables*`)
        and size_bytes > 100*1024*1024*1024
        and project_id = 'emart-datafabric'
    )
    select dataset_id,
        sum(round(IEEE_DIVIDE(size_bytes, 1024*1024*1024))) / (select value from total) as ratio
    from `x1112275.all_tables*`
    where _table_suffix = (SELECT MAX(_TABLE_SUFFIX) FROM `x1112275.all_tables*`)
    and size_bytes > 100*1024*1024*1024
    and project_id = 'emart-datafabric'
    group by dataset_id
    order by ratio desc
    limit 5
""")

df.to_sql('temp_bigquery_stats_dataset_volume_ratio', engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute("""
        INSERT INTO bigquery_stats_dataset_volume_ratio (dataset_id, ratio)
        SELECT dataset_id, ratio
        FROM temp_bigquery_stats_dataset_volume_ratio
        ON CONFLICT (dataset_id) 
        DO 
           UPDATE SET ratio = excluded.ratio
    """)

### GCP 이번달 비용

In [None]:
df = bq_to_pandas("""
    SELECT
      format_timestamp ('%Y-%m', current_timestamp, 'Asia/Seoul') AS ym,
      ROUND(SUM(cost)) AS cost
    FROM
      `billing_edp.gcp_billing_export_v1_01070F_9BDCB3_2A6D6F`
    WHERE
      format_timestamp ('%Y/%m',
        current_timestamp,
        'Asia/Seoul') = format_timestamp ('%Y/%m',
        usage_start_time,
        'Asia/Seoul')
      AND cost >= 1
      AND project.name IN ('emart-datafabric')
""")

df.to_sql('temp_bigquery_stats_cost_monthly', engine, if_exists='replace', index=False)

with engine.connect() as con:
    con.execute("""
        INSERT INTO bigquery_stats_cost_monthly (ym, cost)
        SELECT ym, cost
        FROM temp_bigquery_stats_cost_monthly
        ON CONFLICT (ym) 
        DO 
           UPDATE SET cost = excluded.cost
    """)