# Create Image Collection from Cloud Storage COGs

## Setup

In [None]:
# imports
import ee
import os
import re
import json
import pandas as pd
from pprint import pprint
from google.cloud import storage
from google.auth.transport.requests import AuthorizedSession

# set project paths
ee_project = 'akveg-map'
ee_bucket = 'akveg-data'

# connect to the Google project
ee.Authenticate(auth_mode='notebook') 
ee.Initialize(project=ee_project)
session = AuthorizedSession(ee.data.get_persistent_credentials().with_quota_project(ee_project))

In [None]:
# function for getting list of tif files at a certain GCS location
def get_tif_list(project_name, bucket_name, path):
    storage_client = storage.Client(project=project_name)
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=path)
    tif_list = [f"gs://{bucket_name}/{blob.name}" for blob in blobs if blob.name.endswith('.tif')]
    tif_list = pd.DataFrame(tif_list).rename(columns={0: 'tif'})
    return tif_list

## Raw Dynamic World Counts by MGRS Tile

Clean existing image collection if needed. Should run collection size > 0. Run the following commands in terminal.
```
COLLECTION="projects/akveg-map/assets/dynamic_world_metrics/s2_dw_monthly_counts_mgrs_v20250414b"
earthengine ls $COLLECTION | xargs -P 20 -I {} earthengine rm {}
```

Create new image collection if needed. Run the following commands in terminal.
```
earthengine set_project akveg-map
COLLECTION="projects/akveg-map/assets/dynamic_world_metrics/s2_dw_monthly_counts_mgrs_v20250414b"
earthengine create collection $COLLECTION
```

In [None]:
#TODO Fix date extraction from filename to work for different filename formats
def load_gcs_cogs_to_collection(cogs, project_folder, collection, month_part, tile_part):
    # Request body as a dictionary.
    for cog in cogs['tif']:
      fileOnly = os.path.split(cog)[1]
      # print(fileOnly)

      cogName = fileOnly[:-4]
      print(cogName)
      
      # parts = fileOnly.split('_')
      # Split on both underscore and period
      parts = re.split(r'[_\.]', fileOnly)
      print(parts)
      
      monthxx = f"{parts[month_part]}"[-2:]
      month = int(monthxx)
      print(month)

      tile = f"{parts[tile_part]}"
      print(tile)

      # dt = datetime.strptime(yyyymmdd_hhmmss, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc)
      # dt = f"{dt:%Y-%m-%dT%H:%M:%SZ}"  # Format string for ISO 8601 + Z
      # print(dt)
        
      request = {
        'type': 'IMAGE',
        'gcs_location': {
          'uris': cog
        },
        'properties': {
        #   'source': 'https://code.earthengine.google.com/067b10ee56537817756a3177a9138aee',
            # 'yyyymmdd_hhmmss': yyyymmdd_hhmmss,
            'month': monthxx,
            'tile': tile,
        },
        # 'startTime': dt,#'2023-01-01T00:00:00.000000000Z',
        # 'endTime': dt,#'2024-01-01T00:00:00.000000000Z',
      }

      pprint(json.dumps(request))

      # A folder (or ImageCollection) name and the new asset name.
      asset_id = collection+'/'+cogName
      # print(project_folder)
      # print(asset_id)
        
      url = 'https://earthengine.googleapis.com/v1alpha/projects/{}/assets?assetId={}'
      print(url)
      print(url.format(project_folder, asset_id))

      response = session.post(
        url = url.format(project_folder, asset_id),
        data = json.dumps(request)
      )

      # pprint(json.loads(response.content))
    print('done')

In [None]:
# get a list of MGRS monthly tif files in GCS
gcs_mgrs_folder = 's2_dw_v1_metrics/s2_dw_monthly_counts_mgrs_v20250414b/'
df_mgrs = get_tif_list(ee_project, ee_bucket, gcs_mgrs_folder)
print(df_mgrs)

# call the function above
collection = 'dynamic_world_metrics/s2_dw_monthly_counts_mgrs_v20250414b'
load_gcs_cogs_to_collection(df_mgrs, ee_project, collection, 4, 5)

## Dynamic World Percentages (May-Sep) by AKALB 50km Tile

Clean existing image collection if needed. Should run collection size > 0. Run the following commands in terminal.
```
COLLECTION="projects/akveg-map/assets/dynamic_world_metrics/s2_dw_percentages_56789_v20250414"
earthengine ls $COLLECTION | xargs -P 20 -I {} earthengine rm {}
```

Create new image collection if needed. Run the following commands in terminal.
```
earthengine set_project akveg-map
COLLECTION="projects/akveg-map/assets/dynamic_world_metrics/s2_dw_percentages_56789_v20250414"
earthengine create collection $COLLECTION
```

In [11]:
#TODO Fix date extraction from filename to work for different filename formats
def load_gcs_cogs_to_collection(cogs, project_folder, collection, month_part, tile_part, version_counts_part):
    # Request body as a dictionary.
    for cog in cogs['tif']:
      fileOnly = os.path.split(cog)[1]
      # print(fileOnly)

      cogName = fileOnly[:-4]
      print(cogName)
      
      # parts = fileOnly.split('_')
      # Split on both underscore and period
      parts = re.split(r'[_\.]', fileOnly)
      print(parts)
      
      months = parts[month_part]
      # month = int(monthxx)
      print(months)

      tile = f"{parts[tile_part]}"
      print(tile)

      version_counts = f"{parts[version_counts_part]}"
      print(version_counts)

      # dt = datetime.strptime(yyyymmdd_hhmmss, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc)
      # dt = f"{dt:%Y-%m-%dT%H:%M:%SZ}"  # Format string for ISO 8601 + Z
      # print(dt)
        
      request = {
        'type': 'IMAGE',
        'gcs_location': {
          'uris': cog
        },
        'properties': {
        #   'source': 'https://code.earthengine.google.com/067b10ee56537817756a3177a9138aee',
            # 'yyyymmdd_hhmmss': yyyymmdd_hhmmss,
            'months': months,
            'tile': tile,
            'version_counts': version_counts
        },
        # 'startTime': dt,#'2023-01-01T00:00:00.000000000Z',
        # 'endTime': dt,#'2024-01-01T00:00:00.000000000Z',
      }

      pprint(json.dumps(request))

      # A folder (or ImageCollection) name and the new asset name.
      asset_id = collection+'/'+cogName
      # print(project_folder)
      # print(asset_id)
        
      url = 'https://earthengine.googleapis.com/v1alpha/projects/{}/assets?assetId={}'
      print(url)
      print(url.format(project_folder, asset_id))

      response = session.post(
        url = url.format(project_folder, asset_id),
        data = json.dumps(request)
      )

      # pprint(json.loads(response.content))
    print('done')

In [None]:
# get a list of AKALB percentage tif files in GCS
gcs_akalb_folder = 's2_dw_v1_metrics/s2_dw_pct_akalb_050_v20250414/'
df_akalb = get_tif_list(ee_project, ee_bucket, gcs_akalb_folder)
print(df_akalb)

# call the function above
collection = 'dynamic_world_metrics/s2_dw_percentages_56789_v20250414'
load_gcs_cogs_to_collection(df_akalb, ee_project, collection, 3, 4, 5)

s2_dw_pct_56789_AK050H01V31_v20250414
['s2', 'dw', 'pct', '56789', 'AK050H01V31', 'v20250414', 'tif']
56789
AK050H01V31
v20250414
('{"type": "IMAGE", "gcs_location": {"uris": '
 '"gs://akveg-data/s2_dw_v1_metrics/s2_dw_pct_akalb_050_v20250414/s2_dw_pct_56789_AK050H01V31_v20250414.tif"}, '
 '"properties": {"months": "56789", "tile": "AK050H01V31", "version_counts": '
 '"v20250414"}}')
https://earthengine.googleapis.com/v1alpha/projects/{}/assets?assetId={}
https://earthengine.googleapis.com/v1alpha/projects/akveg-map/assets?assetId=dynamic_world_metrics/s2_dw_percentages_56789_v20250414/s2_dw_pct_56789_AK050H01V31_v20250414
s2_dw_pct_56789_AK050H01V32_v20250414
['s2', 'dw', 'pct', '56789', 'AK050H01V32', 'v20250414', 'tif']
56789
AK050H01V32
v20250414
('{"type": "IMAGE", "gcs_location": {"uris": '
 '"gs://akveg-data/s2_dw_v1_metrics/s2_dw_pct_akalb_050_v20250414/s2_dw_pct_56789_AK050H01V32_v20250414.tif"}, '
 '"properties": {"months": "56789", "tile": "AK050H01V32", "version_counts": '
