In [None]:
#meta 8/2/2022 GCP template - VertexAI
#template copy from MD work for Hack22
#generic: project, bucket, BQ, dataflow, dataset, pubsub (streaming), cloud fn

#$note $secrets key

## Installation

In [1]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform {USER_FLAG} -q --no-warn-script-location
! pip3 install -U google-cloud-storage {USER_FLAG} -q --no-warn-script-location

## Restart the kernel

In [2]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Set up your Google Cloud project

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager)

2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [Enable the Vertex AI APIs, Compute Engine APIs, and Cloud Storage, Pub/Sub API, Dataflow API, Cloud Functions API, Cloud Build API.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component,storage-component.googleapis.com,pubsub.googleapis.com,dataflow.googleapis.com,cloudfunctions.googleapis.com,cloudbuild.googleapis.com)

4. [The Google Cloud SDK](https://cloud.google.com/sdk) is already installed in Google Cloud Notebook.

In [1]:
# OPTIONAL - DEFAULTS TO CURRENT PROJECT_ID
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [3]:
# WARNING: Do not change to us-west1 -- DataFlow is only available in us-central1
REGION = "us-central1"  # @param {type: "string"}

In [4]:
from datetime import datetime
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

## Project location

In [None]:
BUCKET_NAME = "[your-bucket-name]"
BUCKET_URI = f"gs://{BUCKET_NAME}"
BUCKET_URI

In [6]:
# VIEW BUCKET CONTENT:
# ! gsutil ls -r $BUCKET_URI

## Create Pub/Sub to email Topic

In [None]:
TOPIC_2_NAME = '[your-topic]'
TOPIC_2_URI = f'projects/{PROJECT_ID}/topics/{TOPIC_2_NAME}'
TOPIC_2_URI

In [None]:
! gcloud pubsub topics create $TOPIC_2_NAME 

## Create Cloud Functions

In [None]:
! gcloud config set functions/region $REGION

In [14]:
# FUNCTIONS_NAME = '[your_fn]'
# RUNTIME = 'python37'
# ! gcloud functions deploy $FUNCTIONS_NAME   --source 'gs://[your-bucket]/functions/main.py'   --runtime $RUNTIME   --trigger-topic $TOPIC_2_NAME --set-env-vars GOOGLE_FUNCTION_TARGET=hello_pubsub
# ! gcloud functions deploy $FUNCTIONS_NAME --runtime=$RUNTIME --trigger-topic=$TOPIC_2_NAME --source='gs://[your-bucket]/functions/functions.zip' 

# GETTING ERRORS
# https://cloud.google.com/functions/docs/calling/pubsub#functions_calling_pubsub-python
# INSTALLLED FROM THE UI INSTEAD:
# https://cloud.google.com/security-command-center/docs/how-to-enable-real-time-notifications#obtain-a-sendgrid-email-api-key

In [None]:
# INSTRUCTIONS IN THE EMAIL:

#######
# RUNTIME = python37
# Region = us-central1
# trigger = our topic
# Entry Point = send_email

####################### main.py:

# import base64

# def send_email(event, context):
#     """Triggered from a message on a Cloud Pub/Sub topic.
#     Args:
#          event (dict): Event payload.
#          context (google.cloud.functions.Context): Metadata for the event.
#     """

#     import logging
#     from sendgrid import SendGridAPIClient
#     from sendgrid.helpers.mail import Mail, Email
#     from python_http_client.exceptions import HTTPError


#     pubsub_message = base64.b64decode(event['data']).decode('utf-8')
#     print(pubsub_message)



#     log = logging.getLogger(__name__)

#     SENDGRID_API_KEY = 'your-key' #$secrets key
#     sg = SendGridAPIClient(SENDGRID_API_KEY)
	
   
#     html_content = f"""
    
#     ANOMALY: 
    
#     {pubsub_message}


#     """

#     message = Mail(
#         to_emails="[your-email]@[your-domain].com",
#         from_email=Email('[your-email]@[your-domain].com', "Notifications User"),
#         subject=f"Anomaly Detected",
#         html_content=html_content
#         )

#     try:
#       print('Sending email...')
#       response = sg.send(message)
        
#       return f"email.status_code={response.status_code}"
#     except HTTPError as e:
#         return e


##########################

####################### requirements.txt:

# # https://pypi.org/project/sendgrid/
# sendgrid==6.0.5

###################################

## DEMO Prediction and Anomaly to Email

In [None]:
MODEL_LOCATION = f"{BUCKET_URI}/training/models/energy.pkl"
model_file = 'training/models/model.pkl'
! gsutil cp -r $MODEL_LOCATION $model_file

In [16]:
from joblib import load
MODEL = load(model_file)

In [19]:
from google.cloud import pubsub_v1
publisher = pubsub_v1.PublisherClient()

def check_anomaly(actual, predicted, treshhold):
    lower = predicted-treshhold
    upper = predicted+treshhold
    if actual > upper:
        anomaly = 1
    elif actual < lower:
        anomaly = 1
    else:
        anomaly = 1
    return lower, upper, anomaly

    
def format_message(time, predicted, lower, upper, anomaly):
    message = """
    {
      "creation_time": \""""+time+"""\",
      "predicted": """+"{:.8f}".format(predicted)+""",
      "lower": """+"{:.8f}".format(lower)+""",
      "upper": """+"{:.8f}".format(upper)+""",
      "anomaly": """+"{:.0f}".format(anomaly)+"""
    }
    """
    return message

def send_to_topic(topic, message):
    data = message.encode("utf-8")
    future = publisher.publish(topic, data)

def publish_message(topic, time, predicted, lower, upper, anomaly, print_msg=False):
    message = format_message(time, predicted, lower, upper, anomaly)
    if print_msg:
        print(message)
    send_to_topic(topic, message)

In [20]:
# SEND CURRENT TIMESTAMP

from datetime import datetime
time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

treshhold = 2
actual = 2.5

predicted = MODEL.forecast()[0]
lower, upper, anomaly = check_anomaly(actual, predicted, treshhold)
publish_message(TOPIC_2_URI, time, predicted, lower, upper, anomaly, print_msg=True)


    {
      "creation_time": "2022-07-15 17:08:25",
      "predicted": 0.59972642,
      "lower": -1.40027358,
      "upper": 2.59972642,
      "anomaly": 1
    }
    


In [21]:
BREAK HERE!!!!

SyntaxError: invalid syntax (1506601594.py, line 1)

# DEMO CLEAN UP

###### DELETE - PUB SUB TO EMAIL TOPIC:

In [None]:
# TOPIC_NAME = [your-topic]
! gcloud pubsub topics delete $TOPIC_2_NAME

###### DELETE - CLOUD FUNCTIONS:

In [22]:
# TOPIC_NAME = [your-topic]
####$$TODO

###### DELETE - LOCAL MODEL PKL FILE:

In [24]:
import os
# model_file = 'training/models/model.pkl'
os.remove(model_file)

In [25]:
BREAK HERE!!!!

SyntaxError: invalid syntax (1506601594.py, line 1)

## OPTIONAL: SEND PREDICTIONS TO BQ

## Create Pub/Sub to BQ Topic

In [None]:
TOPIC_NAME = '[your-topic]'
TOPIC_URI = f'projects/{PROJECT_ID}/topics/{TOPIC_NAME}'
TOPIC_URI

In [None]:
! gcloud pubsub topics create $TOPIC_NAME 

## Create Dataset in BQ

In [None]:
DATASET_NAME = '[your-dataset]'
TABLE_NAME = f'{DATASET_NAME}.forecast_view'
DATASET_URI = f'{PROJECT_ID}:{TABLE_NAME}'
DATASET_URI

In [None]:
! bq mk --location=$REGION $DATASET_NAME

In [None]:
! bq mk $TABLE_NAME creation_time:TIMESTAMP,predicted:DECIMAL,lower:DECIMAL,upper:DECIMAL,anomaly:INTEGER

In [None]:
# VIEW DATASET:
! bq ls --format prettyjson $DATASET_NAME

## Run DataFlow Pub/Sub to BQ streaming job

In [36]:
# Create DataFlow Pub/Sub to BQ pipeline:
JOB_NAME = '[your-dataflow]'
TEMPLATE = 'gs://dataflow-templates/latest/PubSub_to_BigQuery'
STAGING = f'gs://{BUCKET_NAME}/temp'

In [None]:
!gcloud dataflow jobs run hack22-dataflow     --gcs-location gs://dataflow-templates/latest/PubSub_to_BigQuery     --region $REGION     --staging-location $STAGING     --parameters inputTopic=$TOPIC_URI,outputTableSpec=$DATASET_URI,outputDeadletterTable=$DATASET_URI

#### LOAD TEST DATA BATCH

In [None]:
TRAINING_BUCKET = f'{BUCKET_URI}/training'
TEST_PREDICTIONS = f'{TRAINING_BUCKET}/data/test_predictions.csv' # FROM TRAINING NOTEBOOK - cell 59 - my4_Autoregressive_and_Automated_Methods_forTS.ipynb

!gsutil cp gs://[your-bucket]/training/data/test_predictions.csv ./training/data/test_predictions.csv

In [39]:
import pandas as pd

file_location = 'training/data/test_predictions.csv'
test_df = pd.read_csv(file_location)
test_df.drop(columns=['Unnamed: 0'], inplace=True)
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  46 non-null     object 
 1   predicted  46 non-null     float64
 2   lower      46 non-null     float64
 3   upper      46 non-null     float64
 4   anomaly    46 non-null     int64  
dtypes: float64(3), int64(1), object(1)
memory usage: 1.9+ KB


In [40]:
test_df.head()

Unnamed: 0,timestamp,predicted,lower,upper,anomaly
0,2014-12-30 00:00:00,0.415759,-1.670546,2.329454,0
1,2014-12-30 01:00:00,0.339827,-1.709937,2.290063,0
2,2014-12-30 02:00:00,0.302746,-1.726052,2.273948,0
3,2014-12-30 03:00:00,0.287608,-1.731871,2.268129,0
4,2014-12-30 04:00:00,0.28212,-1.697404,2.302596,0


In [41]:
# Check if Dataflow jobs are running:
! gcloud dataflow jobs list --region=$REGION --status='active'

JOB_ID                                    NAME                           TYPE       CREATION_TIME        STATE    REGION
2022-07-15_10_19_59-16597450199510106227  hack22-dataflow                Streaming  2022-07-15 17:19:59  Running  us-central1
2022-07-15_08_21_06-16100316668033096407  ps-to-text-hack22-email-topic  Streaming  2022-07-15 15:21:06  Running  us-central1


In [None]:
# LOAD BATCH TO BQ

from google.cloud import pubsub_v1
from joblib import load

publisher = pubsub_v1.PublisherClient()
print('TOPIC_URI: '+TOPIC_URI)

for index, row in test_df.iterrows():
    time = row['timestamp']
    predicted = row['predicted']
    lower = row['lower']
    upper = row['upper']
    anomaly = row['anomaly']
    publish_message(TOPIC_URI, time, predicted, lower, upper, anomaly, print_msg=False)
print('Published all messages') 

In [44]:
# CHECK IF MESSAGES MADE IT TO BQ:
!bq query --use_legacy_sql=false 'SELECT * FROM `'"[your-project-id].[your-dataset].forecast_view"'`ORDER BY creation_time desc limit 10'

+---------------------+------------+-------------+------------+---------+
|    creation_time    | predicted  |    lower    |   upper    | anomaly |
+---------------------+------------+-------------+------------+---------+
| 2014-12-31 21:00:00 | 0.69218379 | -1.38048344 | 2.61951656 |       0 |
| 2014-12-31 20:00:00 | 0.75800251 | -1.29767234 | 2.70232766 |       0 |
| 2014-12-31 19:00:00 | 0.83715234 | -1.22784244 | 2.77215756 |       0 |
| 2014-12-31 18:00:00 | 0.88792316 | -1.14413608 | 2.85586392 |       0 |
| 2014-12-31 17:00:00 | 0.81233316 | -1.09042077 | 2.90957923 |       0 |
| 2014-12-31 16:00:00 |  0.7155543 | -1.17009848 | 2.82990152 |       0 |
| 2014-12-31 15:00:00 | 0.70769909 | -1.27260519 | 2.72739481 |       0 |
| 2014-12-31 14:00:00 | 0.72464297 | -1.28111012 | 2.71888988 |       0 |
| 2014-12-31 13:00:00 | 0.74536472 | -1.26320501 | 2.73679499 |       0 |
| 2014-12-31 12:00:00 | 0.76567399 | -1.24127126 | 2.75872874 |       0 |
+---------------------+------------+--

#### DEMO STREAMING TEST

In [None]:
from google.cloud import pubsub_v1
publisher = pubsub_v1.PublisherClient()

MODEL_LOCATION = f"{BUCKET_URI}/training/models/energy.pkl"
model_file = 'training/models/model.pkl'
! gsutil cp -r $MODEL_LOCATION $model_file

from joblib import load
MODEL = load(model_file)

In [46]:
# SEND ANOMALY EXAMPLE

time = "2014-12-31 21:00:00"
treshhold = 2
actual = 24
predicted = MODEL.forecast()[0]
lower, upper, anomaly = check_anomaly(actual, predicted, treshhold)
publish_message(TOPIC_URI, time, predicted, lower, upper, anomaly, print_msg=True)


    {
      "creation_time": "2014-12-31 21:00:00",
      "predicted": 0.59972642,
      "lower": -1.40027358,
      "upper": 2.59972642,
      "anomaly": 1
    }
    


In [47]:
# SEND REGULAR EXAMPLE

time = "2014-12-31 22:00:00"
treshhold = 2
actual = 3
predicted = MODEL.forecast()[0]
lower, upper, anomaly = check_anomaly(actual, predicted, treshhold)
publish_message(TOPIC_URI, time, predicted, lower, upper, anomaly, print_msg=True)


    {
      "creation_time": "2014-12-31 22:00:00",
      "predicted": 0.59972642,
      "lower": -1.40027358,
      "upper": 2.59972642,
      "anomaly": 1
    }
    


In [49]:
# SEND CURRENT TIMESTAMP

from datetime import datetime
time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

treshhold = 2
actual = 2.5
predicted = MODEL.forecast()[0]
lower, upper, anomaly = check_anomaly(actual, predicted, treshhold)
publish_message(TOPIC_URI, time, predicted, lower, upper, anomaly, print_msg=True) #@param TOPIC_URI string


    {
      "creation_time": "2022-07-15 17:25:43",
      "predicted": 0.59972642,
      "lower": -1.40027358,
      "upper": 2.59972642,
      "anomaly": 1
    }
    


In [50]:
# CHECK IF MESSAGES MADE IT TO BQ:
!bq query --use_legacy_sql=false 'SELECT * FROM `'"[your-project-id].hack22_dataset.forecast_view"'`ORDER BY creation_time desc limit 10'

E0715 17:25:47.312982572    1275 backup_poller.cc:136]       Run client channel backup poller: {"created":"@1657905947.312882234","description":"pollset_work","file":"src/core/lib/iomgr/ev_epoll1_linux.cc","file_line":247,"referenced_errors":[{"created":"@1657905947.312874766","description":"Bad file descriptor","errno":9,"file":"src/core/lib/iomgr/ev_epoll1_linux.cc","file_line":732,"os_error":"Bad file descriptor","syscall":"epoll_wait"}]}
+---------------------+------------+-------------+------------+---------+
|    creation_time    | predicted  |    lower    |   upper    | anomaly |
+---------------------+------------+-------------+------------+---------+
| 2022-07-15 17:25:43 | 0.59972642 | -1.40027358 | 2.59972642 |       1 |
| 2014-12-31 22:00:00 | 0.59972642 | -1.40027358 | 2.59972642 |       1 |
| 2014-12-31 21:00:00 | 0.59972642 | -1.40027358 | 2.59972642 |       1 |
| 2014-12-31 21:00:00 | 0.69218379 | -1.38048344 | 2.61951656 |       0 |
| 2014-12-31 20:00:00 | 0.75800251 |

## CLEAN UP

###### STOP -  DATAFLOW JOB:

In [51]:
# Check if Dataflow jobs are running:
! gcloud dataflow jobs list --region=$REGION --status='active'

JOB_ID                                    NAME                           TYPE       CREATION_TIME        STATE    REGION
2022-07-15_10_19_59-16597450199510106227  hack22-dataflow                Streaming  2022-07-15 17:19:59  Running  us-central1
2022-07-15_08_21_06-16100316668033096407  ps-to-text-hack22-email-topic  Streaming  2022-07-15 15:21:06  Running  us-central1


In [52]:
#Replace with the 'id' value from previous cell
JOB_ID = '2022-07-15_08_21_06-16100316668033096407' #@param:string

In [53]:
#Stop the pipeline:
! gcloud dataflow jobs cancel $JOB_ID --region=$REGION 

Cancelled job [2022-07-15_08_21_06-16100316668033096407]


###### DELETE - BQ DATASET:

In [None]:
! bq rm -r=true -f=true $DATASET_NAME

In [None]:
BREAK HERE!!!!

# UTILS

In [None]:
! gsutil cp [your-nb]_deployment.ipynb $BUCKET_URI

In [None]:
# BUCKET_NAME = "[your-bucket]"
# BUCKET_URI = f"gs://{BUCKET_NAME}"
# BUCKET_URI
! gsutil cp -r functions $BUCKET_URI

In [None]:
! gsutil ls -r $BUCKET_URI

In [None]:
from zipfile import ZipFile
import os

def get_all_file_paths(directory):
  
    # initializing empty file paths list
    file_paths = []
  
    # crawling through directory and subdirectories
    for root, directories, files in os.walk(directory):
        for filename in files:
            # join the two strings in order to form the full filepath.
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)
  
    # returning all file paths
    return file_paths  

def zip_files(directory):
    # path to folder which needs to be zipped
    # directory = './functions'
  
    # calling function to get all file paths in the directory
    file_paths = get_all_file_paths(directory)
  
    # printing the list of all files to be zipped
    print('Following files will be zipped:')
    for file_name in file_paths:
        print(file_name)
  
    # writing files to a zipfile
    with ZipFile('functions.zip','w') as zip:
        # writing each file one by one
        for file in file_paths:
            zip.write(file)
  
    print('All files zipped successfully!')
    
zip_files('./functions')

In [None]:
BUCKET_URI

In [None]:
! gsutil cp -r 'functions.zip' gs://[your-bucket]/functions

In [None]:
! gsutil ls -r gs://[your-bucket]/functions

# TESTING

In [None]:
#### TEST PUBLISH TO THE TOPIC

# # Test in the Pub/Sub UI and check if it worked
# # UI link = https://console.cloud.google.com/cloudpubsub/topic/detail/[your-topic]?project=[your-project-id]&tab=messages

# test_massage_1 = """
# {
#   "timestamp": "2022-07-14 12:11:35.22 ",
#   "predicted": 30.23,
#   "lower": 26.274,
#   "upper": 32.573,
#   "anomaly": "N"
# }
# """

# !bq query --use_legacy_sql=false 'SELECT * FROM `'"[your-project-id].hack22_dataset.forecast_view"'`'

# from google.cloud import pubsub_v1

# publisher = pubsub_v1.PublisherClient()
# time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# test_massage_2 = """
# {
#   "timestamp": \""""+time+"""\",
#   "predicted": 0.522726416,
#   "lower": -1.22227358,
#   "upper": 2.11172641,
#   "anomaly": "Y"
# }
# """
# data = test_massage_2.encode("utf-8")
# future = publisher.publish(TOPIC_URI, data)
# print(future.result())

# !bq query --use_legacy_sql=false 'SELECT * FROM `'"[your-project-id].hack22_dataset.forecast_view"'`'

# predicted = MODEL.forecast()[0]
# print(predicted)
# treshhold = 2
# upper = predicted+treshhold
# lower = predicted-treshhold
# time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# anomaly = "Y"

# test_massage_3 = """
# {
#   "timestamp": \""""+time+"""\",
#   "predicted": """+"{:.8f}".format(predicted)+""",
#   "lower": """+"{:.8f}".format(lower)+""",
#   "upper": """+"{:.8f}".format(upper)+""",
#   "anomaly": \""""+anomaly+"""\"
# }
# """

# print(test_massage_3)
# data = test_massage_3.encode("utf-8")
# future = publisher.publish(TOPIC_URI, data)

# !bq query --use_legacy_sql=false 'SELECT * FROM `'"[your-project-id].hack22_dataset.forecast_view"'`'

In [None]:
# LOAD AND TEST MODEL

# import os

# MODEL_LOCATION = f"{BUCKET_URI}/app_engine/models/model.pkl"
# model_file = 'model.pkl'
# ! gsutil cp -r $MODEL_LOCATION $model_file

# from joblib import load

# MODEL = load(model_file)
# MODEL.forecast()
# MODEL.forecast(steps=3)

# os.remove(model_file)

# NOTES

In [None]:
# from google.cloud import pubsub_v1

# publisher = pubsub_v1.PublisherClient()
# TOPIC_PATH = publisher.topic_path(PROJECT_ID, TOPIC_ID)
# print(TOPIC_PATH)
# # topic = publisher.create_topic(request={"name": TOPIC_PATH})
# topic = publisher.create_topic(TOPIC_PATH)
# print(f"Created topic: {topic.name}")

# # publisher.delete_topic(TOPIC_PATH)
# # print(f"Topic deleted: {topic.name}")

In [None]:
# NOTES:
# bq cp myDataset.myTable myDataset.myTableCopy
# bq extract --compression=GZIP --destination_format=CSV --field_delimiter=tab --print_header=false myDataset.myTable gs://my-bucket/myFile.csv.gzip
# bq head --max_rows=10 --start_row=50 --selected_fields=field1,field3 myDataset.myTable
# bq insert --ignore_unknown_values --template_suffix=_insert myDataset.myTable /tmp/myData.json
# bq ls myDataset
# !bq query --use_legacy_sql=false 'SELECT * FROM `'"[your-project-id].hack22_dataset.forecast_view"'`'

In [None]:
# RESTARTING THE PIPELINE
# There is no possiblity of restarting existing pipeline as it is not supported. To restart simply run the job run cell again

# NOTES:
# Create DataFlow Pub/Sub to BQ pipeline:
# !gcloud dataflow jobs run hack22-dataflow     --gcs-location gs://dataflow-templates/latest/PubSub_to_BigQuery     --region $REGION     --staging-location gs://[your-bucket]/temp     --parameters inputTopic=projects/[your-project-id]/topics/hack22-topic,outputTableSpec=[your-project-id]:hack22_dataset.forecast_view,outputDeadletterTable=[your-project-id]:hack22_dataset.forecast_view

In [None]:
## Service Account

# **If you don't know your service account**, try to get your service account using `gcloud` command by executing the second cell below.

In [None]:
# APP_LOCATION = "{}/app_engine".format(BUCKET_URI)

In [None]:
# SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
# if (
#     SERVICE_ACCOUNT == ""
#     or SERVICE_ACCOUNT is None
#     or SERVICE_ACCOUNT == "[your-service-account]"
# ):
#     # Get your service account from gcloud

#     shell_output = !gcloud auth list 2>/dev/null
#     SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

#     print("Service Account:", SERVICE_ACCOUNT)

In [None]:
#### Set service account access for Vertex AI Pipelines

# Run the following commands to grant your service account access to read and write pipeline artifacts in the bucket that you created in the previous step -- you only need to run these once per service account.

In [None]:
# ! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

# ! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI