# Appendix

This notebook contains additional code for filtering soundings around a tower site. This task is not part of the ARSET training, but may be of interest. Performing this search with the default parameters will take about 2 hours, so the output is pre-computed and stored in the file "us_me2_oco3_dates.json"

## Define functions

In [None]:
import numpy as np

def compress_indices(indices: list[int]) -> list[list[int]]:
    """
    Convert a list of indices to a compact representation using ranges.
    
    Args:
        indices: List of integers (should be sorted)
    
    Returns:
        List of lists of ints: [start, end] for ranges or [index] for singles
    """
    if not indices:
        return []
    
    indices = sorted(set(indices))  # Remove duplicates and sort
    compressed = []
    start = indices[0]
    end = indices[0]
    
    for i in indices[1:]:
        if i == end + 1:  # Consecutive
            end = i
        else:  # Gap found
            if start == end:
                compressed.append([start])  # Single index
            else:
                compressed.append([start, end])  # Range
            start = end = i
    
    # Don't forget the last range
    if start == end:
        compressed.append([start])
    else:
        compressed.append([start, end])
    
    return compressed

def extract_indices(g, v, ndcs):
    parts = []

    for item in ndcs:
        if len(item) == 1:
            parts.append(g[v].data[item[0]:item[0]+1])
        else:
            start, end = item
            parts.append(g[v].data[start:end+1])
    
    return np.concatenate(parts) if parts else np.array([])

## Inputs

In [None]:
from datetime import datetime, timedelta

dataset = "OCO3_L2_Lite_SIF.11r"

# From the start of the OCO-3 mission to the end of the Ameriflux dataset (at least for the US-Me-2 site)
start_date = datetime(2019, 8, 6)
end_date = datetime(2022, 12, 31)

# Lat/Lon coordinate of the tower site
tower_site = (44.4526, -121.5589)
# Amount of area around the tower site to allow in spatial averaging of SIF
tolerance = 0.25
lat_min = tower_site[0] - tolerance
lat_max = tower_site[0] + tolerance
lon_min = tower_site[1] - tolerance
lon_max = tower_site[1] + tolerance

# Create list of dates within the time range
current_date = start_date
dates: list[datetime] = []
while current_date <= end_date:
    dates.append(current_date)
    current_date += timedelta(days=1)

## Search

Please be aware that due to the large time range, this process will take about 2 hours.

In [None]:
import json
import numpy as np
import os
import sys
from tqdm.notebook import tqdm

sys.path.append(os.path.abspath("../src"))

from pysif import GesDiscDownloader

dl = GesDiscDownloader()

ndx_obj = {"dates": []}

for dt in tqdm(dates, desc="Studying dates"):
    try:
        granule = dl.get_granule_by_date("OCO3_L2_Lite_SIF.11r", dt)
    except FileNotFoundError:
        continue
    lat = np.array(granule["Latitude"].data[:])
    lon = np.array(granule["Longitude"].data[:])
    coords = [(x, y) for x, y in zip(lon, lat)]

    # It is more efficient to use np.where, but this method is more intuitive and easier to store in JSON
    tower_ndx: list[int] = []
    for ndx, coord in enumerate(coords):
        if coord[0] > lon_min and coord[0] < lon_max and coord[1] > lat_min and coord[1] < lat_max:
            tower_ndx.append(ndx)
    if tower_ndx != []:
        comp_indices = compress_indices(tower_ndx)
        ndx_obj["dates"].append({"date": dt.strftime("%Y-%m-%d"), "indices": comp_indices})

with open("us_me2_oco3_dates.json", "w") as fp:
    json.dump(ndx_obj, fp, indent=4)

In [None]:
with open("us_me2_oco3_dates.json") as fp:
    obj = json.load(fp)

daily_avg_sif: list[dict[str, float]] = []
for day in tqdm(obj["dates"], desc="Computing averages"):
    date_list = day["date"].split("-")
    date = datetime(int(date_list[0]), int(date_list[1]), int(date_list[2]))
    indices = day["indices"]
    granule = dl.get_granule_by_date(dataset, date)
    sif = extract_indices(granule, "Daily_SIF_757nm", indices)
    qual_flag = extract_indices(granule, "Quality_Flag", indices)
    filtsif = np.where(qual_flag < 2, sif, np.nan)
    daily_avg_sif.append({"date": day["date"], "sif": float(np.nanmean(filtsif))})

with open("us_me2_oco3_sif.json", "w") as fp:
    json.dump({"dates": daily_avg_sif}, fp, indent=4)