In [None]:
import os
import numpy as np
import xarray as xr
import earthaccess
from datetime import datetime, timedelta
import pickle
import time
import random
from tqdm import tqdm

# Authenticate
auth = earthaccess.login(persist=True)

# Parameters
bbox = (-83.62, 41.34, -82, 42.27)      # (lon_min, lat_min, lon_max, lat_max)
res = 0.01
start_date = datetime(2024, 4, 14)
end_date = datetime(2025, 5, 23)
window_size = 7
decay = 0.8

# Ensure data directories exist
os.makedirs("../Data/", exist_ok=True)
os.makedirs("../Cache/", exist_ok=True)
os.makedirs("../Images/", exist_ok=True)

# Output grid
lat_bins = np.arange(bbox[1], bbox[3] + res, res)
lon_bins = np.arange(bbox[0], bbox[2] + res, res)
lat_centers = 0.5 * (lat_bins[:-1] + lat_bins[1:])
lon_centers = 0.5 * (lon_bins[:-1] + lon_bins[1:])
nlat, nlon = len(lat_centers), len(lon_centers)

# --- Retry Helpers ---

def safe_search(short_name, temporal, bounding_box, max_retries=500):
    retries = 0
    while True:
        try:
            results = earthaccess.search_data(
                short_name=short_name,
                temporal=temporal,
                bounding_box=bounding_box
            )
            return results
        except Exception as e:
            retries += 1
            if retries >= max_retries:
                print(f"Search failed after {max_retries} retries: {e}")
                return []
            wait = 5 + random.uniform(0, 3)
            print(f"Search error: {e}. Retrying in {wait:.1f} seconds...")
            time.sleep(wait)

def safe_download(results, directory="../Data/", max_retries=5):
    retries = 0
    while True:
        try:
            paths = earthaccess.download(results, directory)
            return paths
        except Exception as e:
            retries += 1
            if retries >= max_retries:
                print(f"Download failed after {max_retries} retries: {e}")
                return []
            wait = 5 + random.uniform(0, 3)
            print(f"Download error: {e}. Retrying in {wait:.1f} seconds...")
            time.sleep(wait)

# Wavelengths from a reference file
print("Retrieving wavelength list from a reference file...")
search_ref = safe_search(
    short_name="PACE_OCI_L2_AOP",
    temporal=("2024-06-01", "2024-06-05"),
    bounding_box=bbox,
)
if not search_ref:
    raise RuntimeError("No reference files found to retrieve wavelengths.")
ref_file = safe_download(search_ref, "../Data/")[0]
wave_all = xr.open_dataset(ref_file, group="sensor_band_parameters")["wavelength_3d"].data
num_channels = len(wave_all)
print(f"Found {num_channels} channels.")

# Prepare main array
total_days = (end_date - start_date).days + 1
ndarray_all = np.full((total_days, nlat, nlon, num_channels), np.nan, dtype=np.float32)

# Process day by day
for day_idx in range(total_days):
    current_date = start_date + timedelta(days=day_idx)
    window_start = current_date - timedelta(days=window_size - 1)
    window_end = current_date
    print(f"\nProcessing {current_date.date()} (window {window_start.date()} to {window_end.date()})")

    # Search for data in window (with retry)
    results = safe_search(
        short_name="PACE_OCI_L2_AOP",
        temporal=(window_start.strftime("%Y-%m-%d"), window_end.strftime("%Y-%m-%d")),
        bounding_box=bbox,
    )

    if not results:
        print("No data found for this window. Skipping to next date.")
        continue

    # Download files (with retry)
    paths = safe_download(results, "../Data/")
    if not paths:
        print("No files downloaded for this window. Skipping to next date.")
        continue

    # Initialize sum and weight arrays
    sum_all = np.zeros((num_channels, nlat, nlon))
    weight_all = np.zeros((num_channels, nlat, nlon))

    for path in paths:
        base = os.path.basename(path)
        date_str = base.split(".")[1][:8]
        file_date = datetime.strptime(date_str, "%Y%m%d")
        delta_days = (window_end - file_date).days
        weight = decay ** delta_days if delta_days >= 0 else 0

        cache_file = f"../Cache/{base}.npz"

        if not os.path.exists(cache_file):
            print(f"Processing {base} (not in cache)")
            try:
                nav = xr.open_dataset(path, group="navigation_data")
                lat = nav["latitude"].values
                lon = nav["longitude"].values

                rrs_ds = xr.open_dataset(path, group="geophysical_data")["Rrs"]
                rrs_ds = rrs_ds.assign_coords(wavelength_3d=wave_all)

                lat_idx_all = []
                lon_idx_all = []
                ch_idx_all = []
                val_all = []

                for ch_idx, wl in tqdm(list(enumerate(wave_all)), desc=f"Channels in {base}", leave=False):
                    band = rrs_ds.sel(wavelength_3d=wl, method="nearest").values
                    mask = (
                        np.isfinite(band) &
                        (lat >= bbox[1]) & (lat <= bbox[3]) &
                        (lon >= bbox[0]) & (lon <= bbox[2])
                    )
                    lat_valid = lat[mask]
                    lon_valid = lon[mask]
                    val_valid = band[mask]

                    lat_idx = np.searchsorted(lat_bins, lat_valid) - 1
                    lon_idx = np.searchsorted(lon_bins, lon_valid) - 1

                    lat_idx_all.extend(lat_idx)
                    lon_idx_all.extend(lon_idx)
                    ch_idx_all.extend([ch_idx] * len(val_valid))
                    val_all.extend(val_valid)

                np.savez_compressed(cache_file,
                                    lat_idx=np.array(lat_idx_all, dtype=np.int16),
                                    lon_idx=np.array(lon_idx_all, dtype=np.int16),
                                    ch_idx=np.array(ch_idx_all, dtype=np.int16),
                                    val=np.array(val_all, dtype=np.float32))
                print(f"Cached data to {cache_file}")

            except Exception as e:
                print(f"Failed to process {path}: {e}")
                continue
        else:
            print(f"Using cached data for {base}")

        # Load from cache
        data = np.load(cache_file)
        lat_idx = data['lat_idx']
        lon_idx = data['lon_idx']
        ch_idx = data['ch_idx']
        val = data['val']

        for j in range(len(val)):
            if 0 <= lat_idx[j] < nlat and 0 <= lon_idx[j] < nlon:
                sum_all[ch_idx[j], lat_idx[j], lon_idx[j]] += val[j] * weight
                weight_all[ch_idx[j], lat_idx[j], lon_idx[j]] += weight

    # Finalize average
    with np.errstate(invalid="ignore", divide="ignore"):
        avg_all = sum_all / weight_all
        avg_all[weight_all == 0] = np.nan

    ndarray_all[day_idx] = np.transpose(avg_all, (1, 2, 0))

    # Delete old files
    delete_date = (current_date - timedelta(days=window_size)).strftime('%Y%m%d')
    for fname in os.listdir("../Data/"):
        if delete_date in fname and fname.endswith(".nc"):
            try:
                os.remove(os.path.join("../Data/", fname))
                print(f"Deleted old file: {fname}")
            except Exception as e:
                print(f"Could not delete {fname}: {e}")

# Save results
np.save("../Images/composite_data.npy", ndarray_all)
print("\nSaved full 4D composite data array to '../Images/composite_data.npy'.")

metadata = {"wavelengths": wave_all, "lat": lat_centers, "lon": lon_centers}
with open("../Images/composite_metadata.pkl", "wb") as f:
    pickle.dump(metadata, f)
print("Saved metadata (wavelengths, lat, lon) to '../Images/composite_metadata.pkl'.")


Retrieving wavelength list from a reference file...


QUEUEING TASKS | : 100%|████████████████████████| 9/9 [00:00<00:00, 3208.56it/s]
PROCESSING TASKS | : 100%|█████████████████████| 9/9 [00:00<00:00, 25420.02it/s]
COLLECTING RESULTS | : 100%|████████████████████| 9/9 [00:00<00:00, 9035.12it/s]


Found 172 channels.

Processing 2024-04-14 (window 2024-04-08 to 2024-04-14)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 5.7 seconds...
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 7.5 seconds...
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 7.0 seconds...


QUEUEING TASKS | : 100%|██████████████████████| 11/11 [00:00<00:00, 1023.45it/s]
PROCESSING TASKS | : 100%|██████████████████████| 11/11 [00:40<00:00,  3.70s/it]
COLLECTING RESULTS | : 100%|█████████████████| 11/11 [00:00<00:00, 24450.10it/s]


Using cached data for PACE_OCI.20240408T181826.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240409T171508.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240409T185328.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240410T175010.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T164652.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T182512.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T172154.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T190014.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc

Processing 2024-04-15 (window 2024-04-09 to 2024-04-15)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 7.7 seconds...
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 5.6 seconds...
Search error: {"errors":["An Internal Error has oc

QUEUEING TASKS | : 100%|██████████████████████| 12/12 [00:00<00:00, 4055.41it/s]
PROCESSING TASKS | : 100%|██████████████████████| 12/12 [00:09<00:00,  1.26it/s]
COLLECTING RESULTS | : 100%|█████████████████| 12/12 [00:00<00:00, 59074.70it/s]


Using cached data for PACE_OCI.20240409T171508.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240409T185328.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240410T175010.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T164652.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T182512.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T172154.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T190014.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240408T181826.L2.OC_AOP.V3_0.nc

Processing 2024-04-16 (window 2024-04-10 to 2024-04-16)


QUEUEING TASKS | : 100%|██████████████████████| 11/11 [00:00<00:00, 3751.92it/s]
PROCESSING TASKS | : 100%|██████████████████████| 11/11 [00:05<00:00,  2.12it/s]
COLLECTING RESULTS | : 100%|█████████████████| 11/11 [00:00<00:00, 79684.53it/s]


Using cached data for PACE_OCI.20240410T175010.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T164652.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T182512.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T172154.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T190014.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240409T185328.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240409T171508.L2.OC_AOP.V3_0.nc

Processing 2024-04-17 (window 2024-04-11 to 2024-04-17)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 7.6 seconds...
Search error: {"e

QUEUEING TASKS | : 100%|██████████████████████| 12/12 [00:00<00:00, 3122.50it/s]
PROCESSING TASKS | : 100%|██████████████████████| 12/12 [00:09<00:00,  1.30it/s]
COLLECTING RESULTS | : 100%|████████████████| 12/12 [00:00<00:00, 103563.06it/s]


Using cached data for PACE_OCI.20240411T164652.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240411T182512.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T172154.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T190014.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240410T175010.L2.OC_AOP.V3_0.nc

Processing 2024-04-18 (window 2024-04-12 to 2024-04-18)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 5.6 seconds...


QUEUEING TASKS | : 100%|██████████████████████| 11/11 [00:00<00:00, 3355.69it/s]
PROCESSING TASKS | : 100%|██████████████████████| 11/11 [00:04<00:00,  2.30it/s]
COLLECTING RESULTS | : 100%|█████████████████| 11/11 [00:00<00:00, 42096.12it/s]


Using cached data for PACE_OCI.20240412T172154.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240412T190014.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240411T164652.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240411T182512.L2.OC_AOP.V3_0.nc

Processing 2024-04-19 (window 2024-04-13 to 2024-04-19)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 5.1 seconds...
Search error: {"e

QUEUEING TASKS | : 100%|██████████████████████| 10/10 [00:00<00:00, 3236.35it/s]
PROCESSING TASKS | : 100%|██████████████████████| 10/10 [00:04<00:00,  2.22it/s]
COLLECTING RESULTS | : 100%|█████████████████| 10/10 [00:00<00:00, 79287.41it/s]


Using cached data for PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240412T190014.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240412T172154.L2.OC_AOP.V3_0.nc

Processing 2024-04-20 (window 2024-04-14 to 2024-04-20)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 6.0 seconds...
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 6.4 seco

QUEUEING TASKS | : 100%|██████████████████████| 11/11 [00:00<00:00, 3375.33it/s]
PROCESSING TASKS | : 100%|██████████████████████| 11/11 [00:06<00:00,  1.63it/s]
COLLECTING RESULTS | : 100%|█████████████████| 11/11 [00:00<00:00, 87381.33it/s]


Using cached data for PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240413T175656.L2.OC_AOP.V3_0.nc

Processing 2024-04-21 (window 2024-04-15 to 2024-04-21)


QUEUEING TASKS | : 100%|██████████████████████| 10/10 [00:00<00:00, 3267.10it/s]
PROCESSING TASKS | : 100%|██████████████████████| 10/10 [00:04<00:00,  2.35it/s]
COLLECTING RESULTS | : 100%|█████████████████| 10/10 [00:00<00:00, 65741.44it/s]


Using cached data for PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240414T165338.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240414T183158.L2.OC_AOP.V3_0.nc

Processing 2024-04-22 (window 2024-04-16 to 2024-04-22)


QUEUEING TASKS | : 100%|████████████████████████| 9/9 [00:00<00:00, 6827.41it/s]
PROCESSING TASKS | : 100%|████████████████████████| 9/9 [00:04<00:00,  1.94it/s]
COLLECTING RESULTS | : 100%|███████████████████| 9/9 [00:00<00:00, 73298.52it/s]


Using cached data for PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240422T181658.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240415T190657.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240415T172837.L2.OC_AOP.V3_0.nc

Processing 2024-04-23 (window 2024-04-17 to 2024-04-23)


QUEUEING TASKS | : 100%|██████████████████████| 10/10 [00:00<00:00, 3538.30it/s]
PROCESSING TASKS | : 100%|██████████████████████| 10/10 [00:06<00:00,  1.52it/s]
COLLECTING RESULTS | : 100%|█████████████████| 10/10 [00:00<00:00, 44620.26it/s]


Using cached data for PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240422T181658.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T171338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T185158.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240416T180338.L2.OC_AOP.V3_0.nc

Processing 2024-04-24 (window 2024-04-18 to 2024-04-24)
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 5.8 seconds...
Search error: {"errors":["An Internal Error has occurred."]}. Retrying in 7.1 seconds...


QUEUEING TASKS | : 100%|████████████████████████| 9/9 [00:00<00:00, 6587.91it/s]
PROCESSING TASKS | : 100%|████████████████████████| 9/9 [00:04<00:00,  1.83it/s]
COLLECTING RESULTS | : 100%|███████████████████| 9/9 [00:00<00:00, 47127.01it/s]


Using cached data for PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240422T181658.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T171338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T185158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240424T174837.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240417T183839.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240417T170019.L2.OC_AOP.V3_0.nc

Processing 2024-04-25 (window 2024-04-19 to 2024-04-25)


QUEUEING TASKS | : 100%|████████████████████████| 9/9 [00:00<00:00, 1580.30it/s]
PROCESSING TASKS | : 100%|████████████████████████| 9/9 [00:04<00:00,  1.95it/s]
COLLECTING RESULTS | : 100%|███████████████████| 9/9 [00:00<00:00, 70295.60it/s]


Using cached data for PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240422T181658.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T171338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T185158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240424T174837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240425T182336.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240418T173520.L2.OC_AOP.V3_0.nc

Processing 2024-04-26 (window 2024-04-20 to 2024-04-26)


QUEUEING TASKS | : 100%|██████████████████████| 10/10 [00:00<00:00, 2290.47it/s]
PROCESSING TASKS | : 100%|██████████████████████| 10/10 [00:08<00:00,  1.19it/s]
COLLECTING RESULTS | : 100%|█████████████████| 10/10 [00:00<00:00, 35971.73it/s]


Using cached data for PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240422T181658.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T171338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T185158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240424T174837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240425T182336.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240426T172015.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240426T185835.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240419T181021.L2.OC_AOP.V3_0.nc

Processing 2024-04-27 (window 2024-04-21 to 2024-04-27)


QUEUEING TASKS | : 100%|████████████████████████| 9/9 [00:00<00:00, 3030.32it/s]
PROCESSING TASKS | : 100%|████████████████████████| 9/9 [00:03<00:00,  2.40it/s]
COLLECTING RESULTS | : 100%|███████████████████| 9/9 [00:00<00:00, 72593.72it/s]


Using cached data for PACE_OCI.20240421T174202.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240422T181658.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T171338.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240423T185158.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240424T174837.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240425T182336.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240426T172015.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240426T185835.L2.OC_AOP.V3_0.nc
Using cached data for PACE_OCI.20240427T175513.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240420T184521.L2.OC_AOP.V3_0.nc
Deleted old file: PACE_OCI.20240420T170701.L2.OC_AOP.V3_0.nc

Processing 2024-04-28 (window 2024-04-22 to 2024-04-28)


QUEUEING TASKS | : 100%|██████████████████████| 10/10 [00:00<00:00, 1945.95it/s]
PROCESSING TASKS | :  90%|████████████████████▋  | 9/10 [00:05<00:00,  1.57it/s]

In [None]:
wave = xr.open_dataset(paths[0], group="sensor_band_parameters")["wavelength_3d"].data
indices = np.where(wave == 450)
indices[0]

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os

def generate_day_images(n, r_idx=113, g_idx=84, b_idx=42):
    """
    Generate individual true-color images for the first n days in the composite data.
    
    Args:
        n (int): Number of days to generate images for.
        r_idx (int): Index of the wavelength to use for red channel.
        g_idx (int): Index of the wavelength to use for green channel.
        b_idx (int): Index of the wavelength to use for blue channel.
    """
    # Load data
    data_path = "../Images/composite_data.npy"
    meta_path = "../Images/composite_metadata.pkl"
    if not os.path.exists(data_path) or not os.path.exists(meta_path):
        print("Required files not found. Run the composite script first.")
        return
    
    data = np.load(data_path)
    print("data.shape", data.shape)
    with open(meta_path, "rb") as f:
        meta = pickle.load(f)
    
    lat = meta["lat"]
    lon = meta["lon"]
    wavelengths = meta["wavelengths"]

    num_days = data.shape[0]
    n = min(n, num_days)  # Ensure n doesn't exceed available data

    # Normalize Rrs data (assumed 0-0.03)
    def normalize(arr, vmin=0, vmax=0.03):
        return np.clip((arr - vmin) / (vmax - vmin), 0, 1)

    for day_idx in range(n):
        daily_data = data[day_idx]  # shape (h, w, c)

        if np.isnan(daily_data).all():
            print(f"Day {day_idx + 1} has no valid data. Skipping.")
            continue

        r = normalize(daily_data[:, :, r_idx])
        g = normalize(daily_data[:, :, g_idx])
        b = normalize(daily_data[:, :, b_idx])

        rgb = np.stack([r, g, b], axis=-1)

        # Plot
        plt.figure(figsize=(10, 6))
        plt.imshow(rgb, origin="lower", extent=[lon.min(), lon.max(), lat.min(), lat.max()])
        plt.title(f"True-Color Image - Day {day_idx + 1}")
        plt.xlabel("Longitude")
        plt.ylabel("Latitude")
        plt.tight_layout()

        out_path = f"../Images/dayyy_{day_idx + 1:03d}.png"
        plt.savefig(out_path)
        plt.close()
        print(f"Saved image: {out_path}")

    return data


In [None]:
data = generate_day_images(5)

In [None]:
data[3][45][100]

In [None]:
break

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import earthaccess
from datetime import datetime, timedelta

# Authenticate
auth = earthaccess.login(persist=True)

# Parameters
selected_wavelengths = [645, 555, 450]  # R, G, B
bbox = (-83.62, 41.34, -82, 42.27)      # (lon_min, lat_min, lon_max, lat_max)
res = 0.01  # grid resolution in degrees
start_date = datetime(2024, 5, 17)
end_date = datetime(2025, 5, 23)

# Ensure output directories exist
os.makedirs("../Images/", exist_ok=True)
os.makedirs("../Data/", exist_ok=True)

# Output grid
lat_bins = np.arange(bbox[1], bbox[3] + res, res)
lon_bins = np.arange(bbox[0], bbox[2] + res, res)
lat_centers = 0.5 * (lat_bins[:-1] + lat_bins[1:])
lon_centers = 0.5 * (lon_bins[:-1] + lon_bins[1:])
nlat, nlon = len(lat_centers), len(lon_centers)

# Iterate through each day
current_date = start_date
while current_date <= end_date:
    window_start = current_date - timedelta(days=4)
    window_end = current_date
    print(f"Processing window: {window_start.date()} to {window_end.date()}")

    results = earthaccess.search_data(
        short_name="PACE_OCI_L2_AOP",
        temporal=(window_start.strftime("%Y-%m-%d"), window_end.strftime("%Y-%m-%d")),
        bounding_box=bbox,
    )

    if not results:
        print(f"No data for {window_end.strftime('%Y-%m-%d')}. Skipping.")
        current_date += timedelta(days=1)
        continue

    paths = earthaccess.download(results, "../Data/")

    if not paths:
        print(f"No files downloaded for {window_end.strftime('%Y-%m-%d')}. Skipping.")
        current_date += timedelta(days=1)
        continue

    # Initialize sum and count arrays
    sum_rgb = np.zeros((3, nlat, nlon))
    count_rgb = np.zeros((3, nlat, nlon))

    try:
        wave = xr.open_dataset(paths[0], group="sensor_band_parameters")["wavelength_3d"].data
    except Exception as e:
        print(f"Failed to read wavelength data: {e}")
        current_date += timedelta(days=1)
        continue

    # Process each file
    for path in paths:
        print(f"Processing {path}")
        try:
            rrs_ds = xr.open_dataset(path, group="geophysical_data")["Rrs"]
            rrs_ds = rrs_ds.assign_coords(wavelength_3d=wave)

            nav = xr.open_dataset(path, group="navigation_data")
            lat = nav["latitude"].values
            lon = nav["longitude"].values

            for b, wl in enumerate(selected_wavelengths):
                band = rrs_ds.sel(wavelength_3d=wl, method="nearest").values
                mask = (
                    np.isfinite(band) &
                    (lat >= bbox[1]) & (lat <= bbox[3]) &
                    (lon >= bbox[0]) & (lon <= bbox[2])
                )

                lat_valid = lat[mask]
                lon_valid = lon[mask]
                val_valid = band[mask]

                lat_idx = np.searchsorted(lat_bins, lat_valid) - 1
                lon_idx = np.searchsorted(lon_bins, lon_valid) - 1

                for j in range(len(val_valid)):
                    if 0 <= lat_idx[j] < nlat and 0 <= lon_idx[j] < nlon:
                        sum_rgb[b, lat_idx[j], lon_idx[j]] += val_valid[j]
                        count_rgb[b, lat_idx[j], lon_idx[j]] += 1
        except Exception as e:
            print(f"Failed to process {path}: {e}")

    # Compute mean reflectance
    with np.errstate(invalid='ignore', divide='ignore'):
        mean_rgb = sum_rgb / count_rgb
        mean_rgb = np.nan_to_num(mean_rgb, nan=0.0)

    # Normalize reflectance for display (Rrs units are ~0–0.03)
    def normalize(arr, vmin=0, vmax=0.03):
        return np.clip((arr - vmin) / (vmax - vmin), 0, 1)

    r = normalize(mean_rgb[0])
    g = normalize(mean_rgb[1])
    b = normalize(mean_rgb[2])
    rgb = np.stack([r, g, b], axis=-1)

    # Save true color image
    plt.figure(figsize=(10, 6))
    plt.imshow(rgb, origin="lower", extent=[bbox[0], bbox[2], bbox[1], bbox[3]])
    plt.title(f"5-Day Composite Ending {window_end.strftime('%Y-%m-%d')}")
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.tight_layout()
    out_path = f"../Images/{window_end.strftime('%Y%m%d')}.png"
    plt.savefig(out_path)
    plt.close()
    print(f"Saved image: {out_path}")

    # Clean up only the earliest date in window
    delete_date = window_start.strftime('%Y%m%d')
    for fname in os.listdir("../Data/"):
        if delete_date in fname and fname.endswith(".nc"):
            try:
                os.remove(os.path.join("../Data/", fname))
                print(f"Deleted old file: {fname}")
            except Exception as e:
                print(f"Could not delete {fname}: {e}")

    current_date += timedelta(days=1)
