# ASF Static RTC Files

> Module for accessing static files hosted by ASF. 

In [None]:
#| default_exp asf_static

# Module Imports

In [None]:
#| export
import logging
import os
import re
import tempfile

import asf_search
import ee
from fastcore.basics import patch
import gcsfs
from IPython.display import JSON
import pandas as pd

from sar_asf_to_gee.core import (
    FORMAT_GEE_DATETIME_STRING,
    create_gee_image_collection
)

# Setup

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)

# ASF Static OPERA Files

In [None]:
#| export
class SearchOpera():   

    LOCAL_PROPNAME = 'local_paths'
    GCS_PATH_PROPNAME = 'gcs_path'
    GEE_ASSET_PROPNAME = 'gee_asset'
    
    def __init__(
        self,
        search_opts,
        gcs_bucket,  # GCS bucket
        gee_gcp_project, # GCP project used by Earth Engine
        gee_image_collection=None,  # Name of the Earth Engine ImageCollection (optional)
        local_storage=None,
    ):
        self.search_opts = search_opts
        self.gcs_bucket = gcs_bucket
        self.gee_gcp_project = gee_gcp_project
        self.gee_image_collection = gee_image_collection
        if local_storage:
            self.tempdir = None
            self.local_storage = local_storage
        else:
            self.tempdir = tempfile.TemporaryDirectory() 
            self.local_storage = self.tempdir.name
            logging.debug(f'created temporary directory: {self.tempdir.name}')
        self._search_results=None
        # self._extracted={}
        # self._gcs_path={}

In [None]:
search_opts = {
    'dataset': 'OPERA-S1',
    'processingLevel': 'RTC',
    'intersectsWith': 'Point (-122.299 37.702)',  # San Francisco
    'start': '2023-12-14 00:00',
    'end': '2024-01-01 00:00',
    'maxResults': 50 
}
obj = SearchOpera(
    search_opts,
    gcs_bucket='asf-static-data-staging',
    local_storage='temp_downloads',
    gee_gcp_project='sar-asf-to-gee',
    gee_image_collection=f'example-opera-s1-rtc',
)

Create a method to return the count of search results.

In [None]:
#| export
@patch
def search_count(
    self:SearchOpera,
):
    "Returns a count of records (w/ duplicates)"
    return asf_search.search_count(**self.search_opts)

In [None]:
obj.search_count()

Create a method to return the search results.

In [None]:
#| export
@patch
def search(
    self:SearchOpera,
):
    if not self._search_results:
        self._search_results = asf_search.search(**self.search_opts)
    return self._search_results

In [None]:
obj.search()

Return search results as a dataframe.

In [None]:
#| export
@patch
def as_dataframe(
    self:SearchOpera,
):
    "Returns results as a pandas dataframe (w/o duplicates)"
    df = pd.DataFrame.from_records([r.properties for r in self.search()])
    # For datasets that have been processed more than once, retain the last result.
    df = (df.sort_values(by=['processingDate'])
            .drop_duplicates(subset=['groupID',
                                     'beamMode',
                                     'processingLevel',
                                     'startTime',
                                     'stopTime'], keep='last')
    )
    return df

In [None]:
obj.as_dataframe()

In [None]:
#| export
@patch
def scene_list(
    self:SearchOpera,
):
    return self.as_dataframe()['sceneName'].to_list()

In [None]:
obj.scene_list()

Transfer files locally.

In [None]:
def get_urls(r):
    # Construct a dictionary of URLs for the polarization bands.
    pols = r.properties['polarization']
    pattern = re.compile(f"({'|'.join(pols) + '|mask'})\.tif$")
    tif_dict = {}
    for url in [r.properties['url']] + r.properties['additionalUrls']:
        m = pattern.search(url)
        if m:
            tif_dict[m.group(1)] = url
    return tif_dict

In [None]:
#| export
@patch
def to_local(
    self:SearchOpera,
):
    "Transfer static ASF results to local system, unzip, and update the job dictionary."    
    logging.info(f'Starting asf_static.to_local()')

    for r in self.search():
        logging.info(f'  Processing {r.properties["fileID"]}')
        r.properties['url_set'] = get_urls(r)
        r.properties[self.LOCAL_PROPNAME] = {}
        for url_key, url_value in r.properties['url_set'].items():
            filename = f'{r.properties["fileID"]}_{url_key}.tif'
            asf_search.download_url(
                url=url_value,
                path=self.local_storage,
                filename=filename
            )
            r.properties[self.LOCAL_PROPNAME][url_key] = os.path.join(self.local_storage, filename)
        # display(JSON(r.properties))
    logging.info(f'Finished asf_static.to_local()')

In [None]:
from pprint import pprint
obj.to_local()

Display the extracted local files.

In [None]:
for r in obj.search():
    display(JSON(r.properties['local_paths']))

## Transfer to Google Cloud Storage

Create an instance method for transferring results from a local computer to Google Cloud Storage.

In [None]:
#| export
@patch
def to_gcs(
    self:SearchOpera,
):
    logging.info('Starting to_gcs()')

    fs = gcsfs.GCSFileSystem(token='google_default')

    if not fs.exists(self.gcs_bucket):
        print('Bucket does not exist!!!')
        fs.mkdir(self.gcs_bucket)

    for r in self.search():
        logging.info(f'  Transferring {r.properties["fileID"]}')
        r.properties[self.GCS_PATH_PROPNAME] = {}
        
        for key, local_filepath in r.properties[self.LOCAL_PROPNAME].items():
            path_split = os.path.split(local_filepath)
            filename = path_split[-1]
            gcs_path = f'{self.gcs_bucket}/{filename}'
            
            if fs.exists(gcs_path):
                logging.info(f'  GCS file already exists:\n    {gcs_path}')
            else:
                logging.info(f'  Starting to transfer file to GCS:\n    {gcs_path}')
                # Transfer the local file to GCS.
                print('filename', filename)
                print('gcs_path', gcs_path)
                fs.put_file(
                    lpath=filepath,
                    rpath=gcs_path
                )    
                logging.info(f'  Transferred file to GCS: {gcs_path}')
            r.properties[self.GCS_PATH_PROPNAME][key] = gcs_path

In [None]:
obj.to_gcs()

Display the GCS paths.

In [None]:
for r in obj.search():
    display(JSON(r.properties[obj.GCS_PATH_PROPNAME]))

## Create a GEE Asset

In [None]:
#| export
@patch
def create_gee_asset(
    self:SearchOpera,
):
    "Create an Earth Engine asset."
    logging.info(f'Starting create_gee_asset()')
    
    ee.Initialize(project=self.gee_gcp_project)
    
    create_gee_image_collection(self.gee_gcp_project, self.gee_image_collection)

    for r in self.search():
        logging.info(f'  Creating GEE assets for {r.properties["fileID"]}')
        r.properties[self.GEE_ASSET_PROPNAME] = {}

        display(JSON({'r.meta': r.meta, 'r.properties': r.properties}))
        start_time = r.properties['startTime']
        end_time = r.properties['stopTime']
        description = (f"{r.properties['platform']}"
                       f" - {r.properties['processingLevel']}"
                       f" - {r.properties['beamModeType']}")
        # id = f"{self.job_dict['job_id']}"
        id = r.properties["fileID"]
        
        for band, gcs_path in r.properties[self.GCS_PATH_PROPNAME].items():
            print('band', band)
            print('gcs_path', gcs_path)

            request = {
                'type': 'IMAGE',
                'bands': {  # TODO: Update this once multi-band COG assets are supported
                    'id': band
                },
                'gcs_location': {
                    'uris': [f'gs://{gcs_path}']
                },
                'properties': {
                    'source':  r.properties['url'],
                    'band': band  # TODO: Remove this once multi-band COG assets are supported
                },
                'startTime': start_time,  #.strftime(FORMAT_GEE_DATETIME_STRING),
                'endTime': end_time,  #.strftime(FORMAT_GEE_DATETIME_STRING),
                'description': description
            }

            path_parts = [
                'projects',
                self.gee_gcp_project,
                'assets',
                self.gee_image_collection,
                # TODO: Remove the band suffix once multi-band COG assets are supported
                f'{id}_{band}'.replace(".", "_") 
            ]
            assetname = os.path.join(*[x for x in path_parts if x is not None])


            logging.debug(f'request = {request}')
            logging.debug(f'assetname = {assetname}')
            try:
                ee.data.createAsset(
                    value=request,
                    path=assetname
                )  
                logging.info(f'Finished creating a GEE asset:\n    {assetname}.')
            except ee.EEException as e:
                print(f'e = {e}')
                if "does not exist or doesn't allow this operation" in str(e):
                    raise(e)
                else:
                    raise(e)  # TODO: Add logic to parse the EEException message.
                    logging.info('GEE asset already exists. Skipping.')

In [None]:
obj.create_gee_asset()

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()