# SBG Preprocess Application Notebook

This notebook is a wrapper to the pre-process code available at https://github.com/sister-jpl/sister-preprocess. the repo is not "installable" so we had to clone it into this environment. while the original preprocess command has some asusmptions about files and run configs built in, we are removing that in favor of the application notebook staging.

In [15]:
import os
import netCDF4
import pathlib

import json
# stage_in packages
from unity_sds_client.resources.collection import Collection

# stage_out packages
from datetime import datetime, timezone
from unity_sds_client.resources.dataset import Dataset
from unity_sds_client.resources.data_file import DataFile

# SISTER methods
import glob
import sys
import hytools as ht
from hytools.io import parse_envi_header
import numpy as np
from PIL import Image
from sister.sensors import emit


## Inputs and Configurations

In the original pre-process, inputs are supplied by a run_config file. This consists of 2 entries (a raw_data file, and a CRID). The system in reality needs 3 inputs files (an observation file, a radiance file, and the crid configurable.

In the Unity system, the data files required will be staged in for the applicaiton, and the crid is a config item that is passed in. To make this work in Unity, we will also pass in an "output collection" which is needed if we want to "persist" the output products in the data catalog.

In [16]:
# The defaults used here generally relflect a local or jupyter environment; they are replaced with "runtime" values when run in the system.
input_stac_collection_file = '/unity/ads/input_collections/EMIT_L1B/catalog.json' # type: stage-in
output_stac_catalog_dir    = '/unity/ads/outputs/SBG-L1B-PRE/process_results'                    # type: stage-out

# pre-process variables
output_collection="L1B_processed"
crid = "001"
temp_directory = "/unity/ads/temp/nb_l1b_preprocess"
sensor = "EMIT"


# Import Files from STAC Item Collection

Load filenames from the stage_in STAC item collection file

In [17]:
inp_collection = Collection.from_stac(input_stac_collection_file)
data_filenames = inp_collection.data_locations()

data_filenames

['/unity/ads/input_collections/EMIT_L1B/./EMIT_L1B_RAD_001_20231206T160939_2334011_006.nc',
 '/unity/ads/input_collections/EMIT_L1B/./EMIT_L1B_OBS_001_20231206T160939_2334011_006.nc']

## Get the data files from the STAC files
STAC makes no guarantee about keynames, so we need to look at the files themselves we are expecting. 

In [18]:
for f in data_filenames:
    if "RAD" in f:
        radiance_file = f
    elif "OBS" in f:
        observation_file = f

print("OBS:" + observation_file)
print("RAD:" + radiance_file)

OBS:/unity/ads/input_collections/EMIT_L1B/./EMIT_L1B_OBS_001_20231206T160939_2334011_006.nc
RAD:/unity/ads/input_collections/EMIT_L1B/./EMIT_L1B_RAD_001_20231206T160939_2334011_006.nc


In [19]:
# check to see if output/temp directories exist:
# This is really onl required if running through the notebook; want to make sure we've got the locations setup
# for temp and output creation.
pathlib.Path(output_stac_catalog_dir).mkdir(parents=True, exist_ok=True)
pathlib.Path(temp_directory).mkdir(parents=True, exist_ok=True)


## Misc. function required by the preprocess command

In [20]:
def generate_quicklook(input_file,output_dir):

    img = ht.HyTools()
    img.read_file(input_file)
    image_file =f"{output_dir}/{img.base_name}.png"

    if 'DESIS' in img.base_name:
        band3 = img.get_wave(560)
        band2 = img.get_wave(850)
        band1 = img.get_wave(660)
    else:
        band3 = img.get_wave(560)
        band2 = img.get_wave(850)
        band1 = img.get_wave(1660)

    rgb=  np.stack([band1,band2,band3])
    rgb[rgb == img.no_data] = np.nan

    rgb = np.moveaxis(rgb,0,-1).astype(float)
    bottom = np.nanpercentile(rgb,5,axis = (0,1))
    top = np.nanpercentile(rgb,95,axis = (0,1))
    rgb = np.clip(rgb,bottom,top)
    rgb = (rgb-np.nanmin(rgb,axis=(0,1)))/(np.nanmax(rgb,axis= (0,1))-np.nanmin(rgb,axis= (0,1)))
    rgb = (rgb*255).astype(np.uint8)

    im = Image.fromarray(rgb)
    im.save(image_file)

def generate_metadata(header_file,output_dir):

    header = parse_envi_header(header_file)
    base_name =os.path.basename(header_file)[:-4]

    metadata = {}
    metadata['sensor'] = header['sensor type'].upper()
    metadata['start_time'] = header['start acquisition time'].upper()
    metadata['end_time'] = header['end acquisition time'].upper()
    metadata['description'] = header['description'].capitalize()

    # Split corner coordinates string into list
    coords = [float(x) for x in header['bounding box'].replace(']','').replace('[','').split(',')]

    metadata['bounding_box'] = [list(x) for x in zip(coords[::2],coords[1::2])]
    metadata['product'] = base_name.split('_')[4]
    metadata['processing_level'] = base_name.split('_')[2]

    config_file = f'{output_dir}/{base_name}.met.json'

    with open(config_file, 'w') as outfile:
        json.dump(metadata,outfile,indent=3)

# Preprocess Command


In [21]:
# This is the code we're actually interested in
emit.nc_to_envi(radiance_file,
                    output_stac_catalog_dir,
                    temp_directory,
                    obs_file = observation_file,
                    export_loc = True,
                    crid = crid)

Exporting EMIT L1B_RDN dataset
Projecting data to UTM Zone 14 North at 60m resolution
Creating output file that is 2015P x 1973L.
Processing /unity/ads/temp/nb_l1b_preprocess/data_gcs [1/1] : 0Using internal nodata values (e.g. -9999) for image /unity/ads/temp/nb_l1b_preprocess/data_gcs.
Copying nodata values from source /unity/ads/temp/nb_l1b_preprocess/data_gcs to destination /unity/ads/outputs/SBG-L1B-PRE/process_results/SISTER_EMIT_L1B_RDN_20231206T160939_001.bin.
...10...20...30...40...50...60...70...80...90...100 - done.
Exporting EMIT location dataset
Projecting location datacube to UTM Zone 14 North at 60m resolution
Creating output file that is 2015P x 1973L.
Processing /unity/ads/temp/nb_l1b_preprocess/loc_gcs [1/1] : 0Using internal nodata values (e.g. -9999) for image /unity/ads/temp/nb_l1b_preprocess/loc_gcs.
Copying nodata values from source /unity/ads/temp/nb_l1b_preprocess/loc_gcs to destination /unity/ads/outputs/SBG-L1B-PRE/process_results/SISTER_EMIT_L1B_RDN_20231206

In [22]:
# Split after the nc_to_envi
for dataset in glob.glob(output_stac_catalog_dir+"/SISTER*.bin"):
        generate_metadata(dataset.replace('.bin','.hdr'),
                                  output_stac_catalog_dir)

#Update crid
for file in glob.glob(output_stac_catalog_dir+"/SISTER*"):
    os.rename(file,file.replace('CRID',
                                    crid))

rdn_file =  glob.glob(output_stac_catalog_dir+"/*%s.bin" % crid)[0]
generate_quicklook(rdn_file,output_stac_catalog_dir)

# Unity does not generate thse, so we comment them out for now
# if os.path.exists(run_config_json):
#     shutil.copyfile(run_config_json,
#                 output+'/%s.runconfig.json' % os.path.basename(rdn_file)[:-4])
# #added this check because i don't think this script is creating a run.log... not sure where this comes from
# if os.path.exists('run.log'):
#     shutil.copyfile('run.log',
#                 output+'/%s.log' % os.path.basename(rdn_file)[:-4])
# done being "interested in"

# Create stage-out item catalog

In [23]:
import json
# stage_in packages
from unity_sds_client.resources.collection import Collection

# stage_out packages
from datetime import datetime, timezone
from unity_sds_client.resources.dataset import Dataset
from unity_sds_client.resources.data_file import DataFile

# Create a collection
out_collection = Collection(output_collection)

data_files = glob.glob(output_stac_catalog_dir+"/SISTER*RDN*.bin") 
# hack to get the radiance file
data_file = os.path.basename(data_files[0].replace("_LOC","").replace("_OBS",""))
name=os.path.splitext(data_file)[0]

# Get some metadata from met.json file
with open(output_stac_catalog_dir + "/" + name+".met.json") as metadata:
    md_dict = json.load(metadata)
    start_time = md_dict['start_time']
    end_time = md_dict['end_time']

# Create a Dataset for the collection
dataset = Dataset(
    name=name, 
    collection_id=out_collection.collection_id, 
    start_time=start_time, 
    end_time=end_time,
    creation_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
)

# Add output file(s) to the dataset
for file in glob.glob(output_stac_catalog_dir+"/SISTER*"):
    #type, location, roles = [], title = "", description = "" 
    if file.endswith(".bin"):
        dataset.add_data_file(DataFile("binary",file, ["data"]))
    elif file.endswith(".png"):
        dataset.add_data_file(DataFile("image/png",file, ["browse"]))
    else:
        dataset.add_data_file(DataFile(None,file, ["metadata"]))
        
#Add the STAC file we are creating

# the future metadata file needs to be added to the STAC as well
    # will eventually be moved into the to_stac() function
dataset.add_data_file(DataFile("text/json",os.path.join(output_stac_catalog_dir, name + ".json"), ["metadata"]))

# Add the dataset to the collection
#out_collection.add_dataset(dataset)
out_collection._datasets.append(dataset)

Collection.to_stac(out_collection, output_stac_catalog_dir)