In [None]:
import os
import pathlib

# Application packages
#from tabulate import tabulate
import netCDF4

# stage_in packages
from unity_sds_client.resources.collection import Collection

# stage_out packages
from datetime import datetime, timezone
from unity_sds_client.resources.dataset import Dataset
from unity_sds_client.resources.data_file import DataFile

import subprocess

In [None]:
input_stac_collection_file = '/unity/ads/users/jfahlen/20230620t084426/catalog_local.json' # type: stage-in
output_stac_catalog_dir    = '/unity/ads/users/jfahlen/20230620t084426/'                    # type: stage-out

In [None]:
local_output_location      = os.path.join(output_stac_catalog_dir,'emit20230620t084426')

# Filename written to the working directory
#summary_table_filename = "summary_table.txt"

output_mf_filename = local_output_location + '_ch4_mf'
output_mf_hdr_filename = local_output_location + '_ch4_mf.hdr'

#For eventual catalogging of this file in the unity environment
output_collection="example-emit_ghg_jay"

print(output_mf_filename)
print(output_mf_hdr_filename)

# Import Files from STAC Item Collection

Load filenames from the stage_in STAC item collection file

In [None]:
#inp_collection = Collection.from_stac(input_stac_collection_file)
inp_collection = Collection.from_stac(input_stac_collection_file)
data_filenames = inp_collection.data_locations()

os.mkdir('~/output/')
cmd = 'aws s3 cp s3://emit-dev-unity-data/emit_ghg_bucket/dataset_ch4_full.hdf5 ~/output/'
subprocess.call(cmd, shell = True)

#print(data_filenames)

In [None]:
# Call ghg_process.py
import ghg_process

l1b_rad_filename = [x for x in data_filenames if 'L1B_RAD_001' in x and '.nc' in x][0]
l1b_obs_filename = [x for x in data_filenames if 'L1B_OBS_001' in x and '.nc' in x][0]
l2a_mask_filename = [x for x in data_filenames if 'L2A_MASK_001' in x and '.nc' in x][0]
#print(l1b_rad_filename,l1b_obs_filename,l2a_mask_filename)
run_cmd = f'python ghg_process.py {l1b_rad_filename} {l1b_obs_filename} {l1b_rad_filename} junk ' + \
          f'{l2a_mask_filename} {l2a_mask_filename} {local_output_location} ' + \
          f'--state_subs {l2a_mask_filename} --overwrite'
#print(run_cmd)
subprocess.call(run_cmd, shell = True)

# Create stage-out item catalog

In [None]:
# Create a collection
out_collection  = Collection(output_collection)
    
# Create a Dataset for the collection
dataset = Dataset(
    name=output_mf_filename + '_dataset', 
    collection_id=out_collection.collection_id, 
    start_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(), 
    end_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    creation_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
)

# Add output file(s) to the dataset
dataset.add_data_file(DataFile("ENVI", output_mf_filename, ["data"]))
dataset.add_data_file(DataFile("ENVI_hdr", output_mf_hdr_filename, ["data"]))


#when we run "to_stac" below, this file  will be generated. this needs to be added to the stac file itself for future reference.
dataset.add_data_file(DataFile("json", output_mf_filename+'.json', ["metadata"] ))


# Add the dataset to the collection
#out_collection.add_dataset(dataset)
out_collection._datasets.append(dataset)

Collection.to_stac(out_collection, output_stac_catalog_dir)

In [None]:
print(dataset)