In [None]:
import ee
ee.Authenticate()
ee.Initialize(project='ucsb-pt')

In [5]:
from tqdm import tqdm

# pull india boundary for filtering embeddings
india = ee.FeatureCollection("users/ptripathy/IND_Boundary")

# Filter and sort embeddings collection by system:index
embeddings = (ee.ImageCollection('GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL')
              .filterBounds(india)
              .filterDate('2024-01-01', '2025-01-01')
              .sort('system:index'))

total_images = embeddings.size().getInfo()
print('Total images for India 2024:', total_images)

# Export function
def export_image(img, destination='gcs'):
    name = ee.String(img.get('UTM_ZONE')).cat('_').cat(ee.String(img.get('system:index'))).getInfo()

    if destination == 'gcs':
      task = ee.batch.Export.image.toCloudStorage(
          image=img,
          description=name,
          bucket='iguide-onetb',  # Replace with your GCS bucket name
          fileNamePrefix='GoogleEmbeddings2024/' + name,
          region=img.geometry(),
          scale=30,
          crs=img.projection().crs(),
          fileDimensions=5120,
          maxPixels=1e13
      )
    elif destination == 'drive':
      task = ee.batch.Export.image.toDrive(
          image=img,
          description=name,
          folder='GoogleEmbeddings2024',
          region=img.geometry(),
          scale=30,
          crs=img.projection().crs(),
          fileDimensions=5120,
          maxPixels=1e13
      )

    task.start()

# Export images in batches by index range
start_idx = 116
end_idx = 120

image_list = embeddings.toList(embeddings.size())
for i in tqdm(range(start_idx, min(end_idx + 1, total_images))):
    export_image(ee.Image(image_list.get(i)))
print(f'Exported batch: images {start_idx} to {min(end_idx, total_images-1)}')

Total images for India 2024: 211


100%|██████████| 5/5 [01:06<00:00, 13.40s/it]

Exported batch: images 116 to 120





In [6]:
# run this if you want to minotor the progress
import time

while True:
    tasks = ee.batch.Task.list()  # Refresh task list each loop

    pending = sum(1 for task in tasks if task.status()['state'] == 'READY')
    running = sum(1 for task in tasks if task.status()['state'] == 'RUNNING')
    completed = sum(1 for task in tasks if task.status()['state'] == 'COMPLETED')
    failed = sum(1 for task in tasks if task.status()['state'] == 'FAILED')

    print(f'Tasks - Pending: {pending}, Running: {running}, Completed: {completed}, Failed: {failed}')

    if pending == 0 and running == 0:
        print('All tasks finished!')
        break

    time.sleep(5)

Tasks - Pending: 4, Running: 3, Completed: 121, Failed: 12
Tasks - Pending: 4, Running: 3, Completed: 121, Failed: 12
Tasks - Pending: 4, Running: 3, Completed: 121, Failed: 12
Tasks - Pending: 3, Running: 3, Completed: 123, Failed: 12
Tasks - Pending: 2, Running: 3, Completed: 123, Failed: 12


KeyboardInterrupt: 