# Testing the lambda handler

In [27]:
%load_ext autoreload
%autoreload 2
from lambda_function import lambda_handler, open_icechunk_repo

import earthaccess
import os
import xarray as xr
import zarr

import warnings
warnings.filterwarnings("ignore") 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
os.environ['SECRET_ARN'] = 'arn:aws:secretsmanager:us-west-2:444055461661:secret:mursst_lambda_edl_credentials-9dKy1C'
os.environ['DRY_RUN'] = "true"

In [63]:
# lambda_handler({})

# Testing the store

In [37]:
earthaccess.login()
ea_creds = earthaccess.get_s3_credentials(daac='PODAAC')
bucket = 'nasa-eodc-public'
store_name = "MUR-JPL-L4-GLOB-v4.1-virtual-v1-p2"
lat_slice = slice(47.6, 47.7)
lon_slice = slice(-122.4, -122.3)
time_range = ["2024-06-02", "2024-06-30"]

### Get a value from the icechunk store

In [58]:
repo = open_icechunk_repo(bucket, store_name, ea_creds)
session = repo.readonly_session(branch="main")
xds = xr.open_zarr(session.store, zarr_version=3, chunks={}, consolidated=False)

In [39]:
subset = xds.sel(lat=lat_slice, lon=lon_slice, time=slice(*time_range))
print(subset.analysed_sst.shape)
subset.analysed_sst.mean().values

(29, 11, 11)


array(285.61836207)

### Get same value from original data

In [None]:
results = earthaccess.search_data(
    short_name='MUR-JPL-L4-GLOB-v4.1',
    temporal=(time_range[0] + " 09:00:00", time_range[1] + " 09:00:00"),
)

direct_access_links = [granule.data_links(access="direct")[0] for granule in results]

fileset = earthaccess.open(direct_access_links, provider='POCLOUD')

og_ds = xr.open_mfdataset(fileset)

In [49]:
og_subset = og_ds.sel(lat=lat_slice, lon=lon_slice, time=slice(*time_range))
print(og_subset.analysed_sst.shape)
og_subset.analysed_sst.mean().values

(29, 11, 11)


array(285.61836207)

# Deleting data

You can delete data directly if necessary or remove previous commits.

### Option 1: first option resizes the arrays.

In [31]:
session = repo.writable_session(branch="main")
store = session.store
variables = ["analysed_sst", "analysis_error", "sea_ice_fraction", "mask", "time"]
resize = 364
for var in variables:
    # Open your array
    group = zarr.open_group(store)
    array = group[var]
    
    # Truncate the array to remove time indices > 364
    # This effectively removes chunks for indices 365, 366, etc.
    new_shape = list(array.shape)
    new_shape[0] = 364  # Assuming time is the first dimension
    array.resize(new_shape)

  super().__init__(**codec_config)


In [38]:
# DANGER!
# session.commit("Removed data for time > 2025-05-31")

'44B9XPA2C0QH6FVT5AF0'

### Option 2: Reset to a previous commit.

First list commits:

In [64]:
[(ancestor.message, ancestor.id) for ancestor in repo.ancestry(branch="main")]

[('Committed data for 2025-05-31 09:00:00 09:00:00 to 2025-06-28 09:00:00.',
  '7PZK6744FDHXHKPYVX8G'),
 ('Removed data for time > 2025-05-31', '44B9XPA2C0QH6FVT5AF0'),
 ('Commit data 2024-10-01 to 2025-06-04', 'GPRDBT2XK9ZERYSQ7EA0'),
 ('Commit data 2024-09-13 to 2024-09-30', 'CP5PHVT9V88VPZTZ0E00'),
 ('Committed data for 2024-09-12 09:00:00 to 2024-09-12 09:00:00 using 20240912090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',
  'W7Z0Y2FAGZ8WFPMJYZTG'),
 ('Committed data for 2024-09-11 09:00:00 to 2024-09-11 09:00:00 using 20240911090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',
  'QRB8HZE1WEK9AA1FWH00'),
 ('Commit data 2024-09-05 to 2024-09-10', '3R6SDVDMWP0SVB6KW0ZG'),
 ('Committed data for 2024-09-04 09:00:00 to 2024-09-04 09:00:00 using 20240904090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',
  'MSX9YMGN8EPP3S2Z8K50'),
 ('Committed data for 2024-09-01 09:00:00 to 2024-09-03 09:00:00 using 20240903090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1',
  'SE5QYNGDA0KSQCKTHGQ0')

In [None]:
# repo.reset_branch("main", "commit id")