In [None]:
#@title Copyright 2024 Google LLC. { display-mode: "form" }
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
%%file requirements.txt
google-auth
google-cloud-storage
functions-framework

Writing requirements.txt


In [None]:
%%capture
!pip install -r requirements.txt

In [None]:
%%writefile main.py
from google.cloud import storage
from google.cloud.storage.fileio import BlobWriter
from markupsafe import escape
import base64
import functions_framework
import google.auth
import io
import json
import requests
import urllib

credentials, project_id = google.auth.default()
email = 'mdewitt@google.com'

@functions_framework.cloud_event
def fetch_data_to_gcs(cloud_event):
  event_data = cloud_event.data['message']['data']
  print(f"Working with event: {event_data}")
  decoded_str = base64.b64decode(event_data).decode('utf-8')
  message = json.loads(decoded_str)

  source_url = message['source_url'] + f"?email={email}"
  bucket_name = message['dest_bucket']
  dest_path = message['dest_path']
  print(f"Downloading from {source_url} to gs://{bucket_name}/{dest_path}")

  return download_file(source_url, bucket_name, dest_path)

def download_file(source_url, bucket_name, dest_path):
  # Configure the storage client.
  client = storage.Client(credentials=credentials, project=project_id)
  bucket = client.get_bucket(bucket_name)
  blob = bucket.blob(dest_path)
  if blob.exists(client=client):
    print("Blob already exists.")
    return "OK"

  # Write a chunk at a time.
  chunk_size = 40 * 1024 * 1024
  total_size = 0
  writer = BlobWriter(blob=blob, chunk_size=chunk_size)
  with requests.get(source_url, stream=True) as r:
    r.raise_for_status()
    for chunk in r.iter_content(chunk_size=chunk_size):
      total_size += writer.write(chunk)
  writer.close()
  print(f"Wrote {total_size} bytes")
  return "OK"

Overwriting main.py


In [None]:
import json
import importlib
import main
importlib.reload(main) # Force reload of the main file.
from flask import Request
from cloudevents.http import CloudEvent
import base64

from google.colab import auth
PROJECT_ID = "mdewitt-earthengine"
auth.authenticate_user(project_id=PROJECT_ID)

attributes = {
    "type": "com.example.sampletype1",
    "source": "https://example.com/event-producer",
}

test_dict = {
    'source_url': 'http://www.dgi.inpe.br/api/download/TIFF/CBERS4A/2024_07/CBERS_4A_MUX_RAW_2024_07_30.13_19_46_ETC2/202_142_0/2_BC_UTM_WGS84/CBERS_4A_MUX_20240730_202_142_L2_BAND5.tif',
    'dest_bucket': 'earthengine_test',
    'dest_path': '202/142/CBERS_4A_MUX_20240730_202_142_L2/CBERS_4A_MUX_20240730_202_142_L2_BAND5.tif'
}

data = {
    'message': {
        'data': base64.b64encode(json.dumps(test_dict).encode('utf-8'))
  }
}
event = CloudEvent(attributes, data)

print(main.fetch_data_to_gcs(event))

Working with event: eyJzb3VyY2VfdXJsIjogImh0dHA6Ly93d3cuZGdpLmlucGUuYnIvYXBpL2Rvd25sb2FkL1RJRkYvQ0JFUlM0QS8yMDI0XzA5L0NCRVJTXzRBX1dQTV9SQVdfMjAyNF8wOV8wMS4xM181NV8wN19FVEMyLzIxNF8xMzdfMC80X0JDX1VUTV9XR1M4NC9DQkVSU180QV9XUE1fMjAyNDA5MDFfMjE0XzEzN19MNF9CQU5ENC50aWYiLCAiZGVzdF9idWNrZXQiOiAiZWFydGhlbmdpbmVfdGVzdCIsICJkZXN0X3BhdGgiOiAiMjE0LzEzNy80X0JDX1VUTV9XR1M4NC9DQkVSU180QV9XUE1fMjAyNDA5MDFfMjE0XzEzN19MNF9CQU5ENC50aWYifQ==
Downloading from http://www.dgi.inpe.br/api/download/TIFF/CBERS4A/2024_09/CBERS_4A_WPM_RAW_2024_09_01.13_55_07_ETC2/214_137_0/4_BC_UTM_WGS84/CBERS_4A_WPM_20240901_214_137_L4_BAND4.tif?email=mdewitt@google.com
Downloading to 214/137/4_BC_UTM_WGS84/CBERS_4A_WPM_20240901_214_137_L4_BAND4.tif
Blob already exists.
OK


In [None]:
!gcloud config set functions/region us-east1
!gcloud functions deploy fetch_data_to_gcs\
  --runtime python312\
  --trigger-topic=cbers-data-to-download\
  --project $PROJECT_ID\
  --timeout=540\
  --memory=1G\
  --gen2

Updated property [functions/region].
  [INFO] A new revision will be deployed serving with 100% traffic.
You can view your function in the Cloud Console here: https://console.cloud.google.com/functions/details/us-east1/fetch_data_to_gcs?project=mdewitt-earthengine

buildConfig:
  automaticUpdatePolicy: {}
  build: projects/303338009467/locations/us-east1/builds/48332120-dd9b-4fe1-bc3c-0a4da8fd4d55
  dockerRegistry: ARTIFACT_REGISTRY
  dockerRepository: projects/mdewitt-earthengine/locations/us-east1/repositories/gcf-artifacts
  entryPoint: fetch_data_to_gcs
  runtime: python312
  source:
    storageSource:
      bucket: gcf-v2-sources-303338009467-us-east1
      generation: '1725892486062117'
      object: fetch_data_to_gcs/function-source.zip
  sourceProvenance:
    resolvedStorageSource:
      bucket: gcf-v2-sources-303338009467-us-east1
      generation: '1725892486062117'
      object: fetch_data_to_gcs/function-source.zip
createTime: '2024-09-09T14:01:27.392784696Z'
environment: G