In [1]:

import os
from google.cloud import storage, bigquery
from google.auth import default
from dotenv import load_dotenv
from google.cloud.exceptions import NotFound
from google.cloud import aiplatform
import subprocess


In [2]:
# Load environment variables from secrets.env
load_dotenv(dotenv_path="../credentials/secrets.env")

cred_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")

if not cred_path:
    print("GOOGLE_APPLICATION_CREDENTIALS is not set.")
elif not os.path.exists(cred_path):
    print(f"File does not exist at path: {cred_path}")
else:
    print("GOOGLE_APPLICATION_CREDENTIALS loaded and file exists.")

if not cred_path or not os.path.exists(cred_path):
    print(
        "GOOGLE_APPLICATION_CREDENTIALS is not set or the file does not exist.\n"
        "Please ensure secrets.env contains a valid path to your service account JSON file."
    )
    storage_client = None
    bq_client = None
else:
    print("GOOGLE_APPLICATION_CREDENTIALS loaded from .env:")
    print(cred_path)

    # Initialize GCP clients using ADC (Application Default Credentials)
    storage_client = storage.Client()
    bq_client = bigquery.Client()

    # Confirm authentication
    creds, project_id = default()
    member_email = creds.service_account_email
    print(f"Authenticated as: {member_email}")
    print(f"GCP Project ID: {project_id}")

# GCP configuration
REGION = "us-east1"
print(f"GCP region set to: {REGION}")


GOOGLE_APPLICATION_CREDENTIALS loaded and file exists.
GOOGLE_APPLICATION_CREDENTIALS loaded from .env:
C:/Users/iauge/Documents/Drexel MSDS/DSCI 591/DSCI591-FACTS/credentials/dsci-591-capstone-8d1bd9498612.json
Authenticated as: 13742792432-compute@developer.gserviceaccount.com
GCP Project ID: dsci-591-capstone
GCP region set to: us-east1


In [3]:
# Initialize GCP clients with the project ID if provided
storage_client = storage.Client(project = project_id if project_id else None)
bq_client = bigquery.Client(project = project_id if project_id else None)

In [4]:
# Create a sample bucket if it does not exist
bucket_name = f"{project_id}-data-pipeline"

if storage_client is not None:
    if not storage_client.lookup_bucket(bucket_name):
        bucketDef = storage_client.bucket(bucket_name)
        bucket = storage_client.create_bucket(bucketDef, project = project_id, location=REGION)
        print(f"Bucket created: {bucket.name}")
    else:
        bucketDef = storage_client.bucket(bucket_name)
        print(f"Bucket {bucket_name} already exists.")
  
else:
	print("storage_client is not initialized. Cannot create bucket.")


Bucket created: dsci-591-capstone-data-pipeline


In [5]:
# Create a sample BigQuery dataset if it does not exist
if bq_client is not None:
    dataset_ref = f"{project_id}.sample_dataset"
    try:
        bq_client.get_dataset(dataset_ref)
        print(f"BigQuery dataset {dataset_ref} already exists.")
    except NotFound:
        dataset = bigquery.Dataset(dataset_ref)
        dataset.location = REGION
        bq_client.create_dataset(dataset, exists_ok=True)
        print(f"BigQuery dataset created: {dataset.dataset_id}")
else:
    print("bq_client is not initialized. Cannot create dataset.")

BigQuery dataset created: sample_dataset


In [6]:
aiplatform.init(project=project_id, location=REGION)

print("Vertex AI initialized.")

Vertex AI initialized.
