# Cleanup Google Cloud Resources

::: {.content-hidden}
Import necessary Python modules
:::

In [1]:
import os
import sys
from glob import glob

import pandas as pd
from google.cloud import bigquery, storage
from google.oauth2 import service_account

::: {.content-hidden}
Get relative path to project root directory
:::

In [2]:
PROJ_ROOT_DIR = os.path.join(os.pardir)

## About

### Overview
This step deletes all Google Cloud resources that were created during this project. This includes

1. BigQuery [tables](https://cloud.google.com/bigquery/docs/tables-intro) in the [dataset](https://cloud.google.com/bigquery/docs/datasets-intro) that was manually created as part of the pre-requisites for this project
2. Storage [bucket](https://cloud.google.com/storage/docs/json_api/v1/buckets)

### Order of Operations
This step can be run at the end of the project, after the campaign has ended and its outcomes have been analyzed.

## User Inputs

Define the following GCP resources

1. BigQuery
   - dataset id
   - table ids for
     - raw GA360 data
     - audience
       - cohorts
       - profile
2. GCS bucket name

In [3]:
#| echo: true
# 1. Bigquery resources
gbq_dataset_id = 'mydemo2asdf'
gbq_table_id_raw_data = 'ecwa'
gbq_table_id_cohorts = 'audience_cohorts'
gbq_table_id_profiles = 'audience_profiles'

# 2. GCS bucket name
bucket_name = "ecwa-raw"

::: {.content-hidden}
Get path to data sub-folders
:::

In [4]:
data_dir = os.path.join(PROJ_ROOT_DIR, "data")
raw_data_dir = os.path.join(data_dir, "raw")

::: {.content-hidden}
Load Google Cloud authentication credentials for use with the native Google Python client
:::

In [8]:
gcp_proj_id = os.environ["GCP_PROJECT_ID"]
gcp_creds_fpath = glob(os.path.join(raw_data_dir, "*.json"))[0]
gcp_creds = service_account.Credentials.from_service_account_file(
    gcp_creds_fpath
)

::: {.content-hidden}
Get path to all raw data files that were previously exported
:::

In [7]:
raw_data_files = sorted(glob(os.path.join(raw_data_dir, "ga_data", "*.parquet.gzip")))

::: {.content-hidden}
Create authenticated native BigQuery Python client
:::

In [9]:
client = bigquery.Client(project=gcp_proj_id, credentials=gcp_creds)

::: {.content-hidden}
Create authenticated native GCS Python client
:::

In [10]:
storage_client = storage.Client(project=gcp_proj_id, credentials=gcp_creds)

## Cleanup BigQuery

### Tables

[Delete BigQuery table](https://cloud.google.com/bigquery/docs/samples/bigquery-delete-table#code-sample)s

In [12]:
#| echo: true
for gbq_table_id in [
    gbq_table_id_raw_data, gbq_table_id_cohorts, gbq_table_id_profiles
]:
    gbq_table_fully_resolved = f"{gcp_proj_id}.{gbq_dataset_id}.{gbq_table_id}"
    client.delete_table(gbq_table_fully_resolved)
    print(f"Deleted table {gbq_table_fully_resolved}")

Deleted table demoabc-381618.mydemo2asdf.ecwa
Deleted table demoabc-381618.mydemo2asdf.audience_cohorts
Deleted table demoabc-381618.mydemo2asdf.audience_profiles


## Cleanup Storage

### Files

[List files in specified bucket](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_list_files.py)

In [16]:
blobs = list(storage_client.list_blobs(bucket_name))
if blobs:
    df_blobs = (
        pd.DataFrame.from_records(
            [
                {
                    "name": b.name,
                    "bucket": b.bucket.name,
                    "size_mb": b.size / 1_000_000,
                    "id": b.id,
                    "time_created": b.time_created,
                }
                for b in blobs
            ]
        )
        .assign(time_created=lambda df: df['time_created'].dt.tz_convert('US/Eastern'))
    )
    print(
        f"Showing {len(df_blobs):,} files found in bucket {bucket_name}, "
        f"taking up {df_blobs['size_mb'].sum():.1f} MB of space"
    )
    with pd.option_context('display.max_colwidth', None):
        display(df_blobs)
else:
    print(f"Found no files in bucket {bucket_name}")

Showing 55 files found in bucket ecwa-raw, taking up 225.4 MB of space


Unnamed: 0,name,bucket,size_mb,id,time_created
0,d2i_ecwa__20160801__20160807__20230611_200354.parquet.gzip,ecwa-raw,4.660864,ecwa-raw/d2i_ecwa__20160801__20160807__20230611_200354.parquet.gzip/1686584306817844,2023-06-12 11:38:26.871000-04:00
1,d2i_ecwa__20160808__20160814__20230611_200726.parquet.gzip,ecwa-raw,5.095205,ecwa-raw/d2i_ecwa__20160808__20160814__20230611_200726.parquet.gzip/1686584307242229,2023-06-12 11:38:27.280000-04:00
2,d2i_ecwa__20160815__20160821__20230611_201100.parquet.gzip,ecwa-raw,5.052166,ecwa-raw/d2i_ecwa__20160815__20160821__20230611_201100.parquet.gzip/1686584307682931,2023-06-12 11:38:27.722000-04:00
3,d2i_ecwa__20160822__20160828__20230611_201420.parquet.gzip,ecwa-raw,4.852887,ecwa-raw/d2i_ecwa__20160822__20160828__20230611_201420.parquet.gzip/1686584308098322,2023-06-12 11:38:28.148000-04:00
4,d2i_ecwa__20160829__20160831__20230611_201631.parquet.gzip,ecwa-raw,2.51759,ecwa-raw/d2i_ecwa__20160829__20160831__20230611_201631.parquet.gzip/1686584308482404,2023-06-12 11:38:28.521000-04:00
5,d2i_ecwa__20160901__20160907__20230611_110546.parquet.gzip,ecwa-raw,4.185168,ecwa-raw/d2i_ecwa__20160901__20160907__20230611_110546.parquet.gzip/1686584308908315,2023-06-12 11:38:28.945000-04:00
6,d2i_ecwa__20160908__20160914__20230611_110929.parquet.gzip,ecwa-raw,4.501941,ecwa-raw/d2i_ecwa__20160908__20160914__20230611_110929.parquet.gzip/1686584309285385,2023-06-12 11:38:29.325000-04:00
7,d2i_ecwa__20160915__20160921__20230611_111509.parquet.gzip,ecwa-raw,5.266326,ecwa-raw/d2i_ecwa__20160915__20160921__20230611_111509.parquet.gzip/1686584309721621,2023-06-12 11:38:29.759000-04:00
8,d2i_ecwa__20160922__20160928__20230611_111820.parquet.gzip,ecwa-raw,4.95172,ecwa-raw/d2i_ecwa__20160922__20160928__20230611_111820.parquet.gzip/1686584310161748,2023-06-12 11:38:30.201000-04:00
9,d2i_ecwa__20160929__20160930__20230611_200041.parquet.gzip,ecwa-raw,1.411386,ecwa-raw/d2i_ecwa__20160929__20160930__20230611_200041.parquet.gzip/1686584310500825,2023-06-12 11:38:30.539000-04:00


[Delete all file](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_delete_file.py)s in specified bucket

In [18]:
#| echo: true
bucket = storage_client.bucket(bucket_name)
for k, row in df_blobs.iterrows():
    blob = bucket.blob(row['name'])
    blob.reload()
    generation_match_precondition = blob.generation
    blob.delete(if_generation_match=generation_match_precondition)
    print(f"Delete blob {k+1}/{len(df_blobs)}, {row['name']}, from bucket {bucket_name}")

Delete blob d2i_ecwa__20160801__20160807__20230611_200354.parquet.gzip
Delete blob d2i_ecwa__20160808__20160814__20230611_200726.parquet.gzip
Delete blob d2i_ecwa__20160815__20160821__20230611_201100.parquet.gzip
Delete blob d2i_ecwa__20160822__20160828__20230611_201420.parquet.gzip
Delete blob d2i_ecwa__20160829__20160831__20230611_201631.parquet.gzip
Delete blob d2i_ecwa__20160901__20160907__20230611_110546.parquet.gzip
Delete blob d2i_ecwa__20160908__20160914__20230611_110929.parquet.gzip
Delete blob d2i_ecwa__20160915__20160921__20230611_111509.parquet.gzip
Delete blob d2i_ecwa__20160922__20160928__20230611_111820.parquet.gzip
Delete blob d2i_ecwa__20160929__20160930__20230611_200041.parquet.gzip
Delete blob d2i_ecwa__20161001__20161007__20230611_001001.parquet.gzip
Delete blob d2i_ecwa__20161008__20161014__20230611_001454.parquet.gzip
Delete blob d2i_ecwa__20161015__20161021__20230611_001736.parquet.gzip
Delete blob d2i_ecwa__20161022__20161028__20230611_002043.parquet.gzip
Delete

Verify that no files are found in specified bucket

In [19]:
#| echo: true
blobs = list(storage_client.list_blobs(bucket_name))
try:
    assert not blobs
    print(f"Found no files in bucket {bucket_name}")
except AssertionError as e:
    print(f"{str(e)}Found files in bucket {bucket_name} that were not deleted")

Found no files in bucket ecwa-raw


### Buckets

[Delete specified bucket](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_delete_bucket.py)

In [20]:
try:
    bucket = storage_client.get_bucket(bucket_name)
    bucket.delete()
    print(f"Deleted bucket {bucket_name}")
except Exception as e:
    if "you already own it" in str(e):
        print(f"Bucket {bucket_name} already exists")
    else:
        print(f"Got response: {str(e)}")

Deleted bucket ecwa-raw


Verify that specified bucket name is not found in [list of buckets](https://github.com/googleapis/python-storage/blob/main/samples/snippets/storage_list_buckets.py) in Google Cloud project

In [21]:
#| echo: true
buckets = list(storage_client.list_buckets())
if buckets:
    df_buckets = pd.DataFrame.from_records(
        [
            {
                "bucket": b.name,
                "time_created": b.time_created,
            }
            for b in buckets
        ]
    )
    try:
        assert df_buckets.query(f"bucket = '{bucket_name}'").empty
        print(f"Did not find bucket {bucket_name}")
    except AssertionError as e:
        print(f"{str(e)}Found bucket {bucket_name}")
        print("Showing specified bucket in list of buckets")
        display(df_buckets.query(f"bucket = '{bucket_name}'"))
else:
    print("Found no buckets")

Found no buckets


## Local Storage

### Files

In [8]:
for k, f in enumerate(raw_data_files):
    if os.path.exists(f):
        os.remove(f)
        print(f"Deleted local file {k+1}/{len(raw_data_files):,} at {os.path.basename(f)}")
    else:
        print(f"File {f} does not exist") 

Deleted local file 1/55 at d2i_ecwa__20160801__20160807__20230611_200354.parquet.gzip
Deleted local file 2/55 at d2i_ecwa__20160808__20160814__20230611_200726.parquet.gzip
Deleted local file 3/55 at d2i_ecwa__20160815__20160821__20230611_201100.parquet.gzip
Deleted local file 4/55 at d2i_ecwa__20160822__20160828__20230611_201420.parquet.gzip
Deleted local file 5/55 at d2i_ecwa__20160829__20160831__20230611_201631.parquet.gzip
Deleted local file 6/55 at d2i_ecwa__20160901__20160907__20230611_110546.parquet.gzip
Deleted local file 7/55 at d2i_ecwa__20160908__20160914__20230611_110929.parquet.gzip
Deleted local file 8/55 at d2i_ecwa__20160915__20160921__20230611_111509.parquet.gzip
Deleted local file 9/55 at d2i_ecwa__20160922__20160928__20230611_111820.parquet.gzip
Deleted local file 10/55 at d2i_ecwa__20160929__20160930__20230611_200041.parquet.gzip
Deleted local file 11/55 at d2i_ecwa__20161001__20161007__20230611_001001.parquet.gzip
Deleted local file 12/55 at d2i_ecwa__20161008__2016