In [1]:
# Print every output from a specific cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Adjusting the screen size
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [3]:
#! pip freeze

In [5]:
from google.cloud import bigquery
import json
import os

In [6]:
GCP_CREDENTIALS = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]

BQ_CLIENT = bigquery.Client.from_service_account_json(GCP_CREDENTIALS)

In [None]:
job = BQ_CLIENT.get_job(job_id='bquxjob_3d4f41c7_1777f1721dd', project='peya-food-and-groceries')

In [None]:
job.query

In [None]:
job.state

In [None]:
job.total_bytes_processed

In [7]:
# Table Id
config_table =  {"project_id":"peya-data-qlty-pro",
                 "dataset_id":"gcp_cost_control",
                 "table_id":"usage_metrics_per_user_daily"
        }

config_table_id = '{project}.{dataset}.{table}'.format(project=config_table['project_id'],
                                               dataset=config_table['dataset_id'],
                                               table=config_table['table_id'])

In [8]:
# Obtiene la metadata de la tabla
table = BQ_CLIENT.get_table(config_table_id)

# save original_schema
table_schema = table.schema

In [9]:
table_schema

[SchemaField('user_email', 'STRING', 'NULLABLE', 'Email address or service account of the user who ran the job.', (), None),
 SchemaField('query', 'STRING', 'NULLABLE', 'SQL query text.', (), None),
 SchemaField('statement_type', 'STRING', 'NULLABLE', 'The type of query statement, if valid. For example, SELECT, INSERT, UPDATE, or DELETE.', (), None),
 SchemaField('duration_in_seconds', 'INTEGER', 'NULLABLE', 'Diff between start date and end date of process in seconds', (), None),
 SchemaField('processed_tb', 'FLOAT', 'NULLABLE', 'Amount of TB proccessed by Job that was billed', (), None),
 SchemaField('cost_usd', 'FLOAT', 'NULLABLE', '5 (Dollar per TB)  * total_bytes_processed/POWER(2,40)', (), None),
 SchemaField('project_id', 'STRING', 'NULLABLE', 'ID of the project.', (), None),
 SchemaField('execution_date', 'DATE', 'NULLABLE', 'The Creation Date of this job', (), None),
 SchemaField('slot_usage', 'FLOAT', 'NULLABLE', 'total_slot_ms / (TIMESTAMP_DIFF(end_time, start_time, MILLISECO

In [None]:
original_schema = table.schema

In [None]:
new_schema = original_schema[:]  # Creates a copy of the schema.

In [None]:
for field in new_schema:
    print(field)

In [None]:
table.schema = new_schema
table = BQ_CLIENT.update_table(table, ["schema"])  # Make an API request.

#### Update Clusters Fields

In [10]:
table.clustering_fields

['project_id', 'user_email']

In [11]:
new_cluster_fields = ['project_id','user_email','statement_type','job_id']

In [12]:
table.clustering_fields = new_cluster_fields

In [13]:
# Actualiza Clusters Fields
table_updated = BQ_CLIENT.update_table(table,["clustering"])

In [None]:
dag_name = 'DAG_NAME_TEST'
# Specify bigquery job labels
BQ_JOB_LABELS = {
    "dag":dag_name,
    "task":"",
    "team":"data-quality@pedidosya.com",
    "owner":"diego.pietruszka@pedidosya.com,carlos.jaime@pedidosya.com"
}

In [None]:
BQ_JOB_LABELS

In [None]:
BQ_JOB_LABELS['task'] = 'task_name2'

In [None]:
BQ_JOB_LABELS

In [None]:
label = 'Ola.Todos113231_  @ '

In [None]:
label.translate(str.maketrans({'.': '_', ' ': ''})).lower()

In [None]:
def format_bigquery_job_labels_values(value):
    # Keys and values can contain only lowercase letters, numeric characters, underscores, and dashes.
    # All characters must use UTF-8 encoding, and international characters are allowed.
    dict_format = {'.': '_', 
                   ' ': '',
                  '@':'-'}
    format_value = value.translate(str.maketrans(dict_format)).lower()

    return format_value

In [None]:
format_bigquery_job_labels_values(label)

#### Update Expiration time

In [None]:
from datetime import datetime, timedelta, datetime
import pytz






In [None]:
view_id = 'peya-data-qlty-pro.framework_temporal_views.ge_mt_peya-bi-tools-pro_dim_user_20210322_150000'

In [None]:
project_id = view_id.split('.')[0]
dataset_id = view_id.split('.')[1]
table_id = view_id.split('.')[2]

In [None]:

from google.cloud import bigquery
BQ_CLIENT = bigquery.Client.from_service_account_json(GCP_CREDENTIALS)

dataset_ref = bigquery.DatasetReference(project_id, dataset_id)
table_ref = dataset_ref.table(table_id)
table = BQ_CLIENT.get_table(table_ref)  # API request


expiration = datetime.now(pytz.utc) + timedelta(days=1)
table.expires = expiration
table = BQ_CLIENT.update_table(table, ["expires"])  # API request

In [None]:
# set table to expire 5 days from now
expiration = datetime.now(pytz.utc) + timedelta(days=5)
table.expires = expiration
table = BQ_CLIENT.update_table(table, ["expires"])  # API request

# expiration is stored in milliseconds
margin = timedelta(microseconds=1000)
assert expiration - margin <= table.expires <= expiration + margin