In [None]:
import collections
import os
import pprint

import ee
import numpy as np
import pandas as pd

from IPython.display import Image, display
import ipyplot


In [None]:
ee.Initialize(
    project='ee-cmorton',
    opt_url='https://earthengine-highvolume.googleapis.com'
)

stats_ws = os.path.join(os.getcwd(), 'stats')
if not os.path.isdir(stats_ws):
    os.makedirs(stats_ws)
    

In [None]:
wrs2_skip_list = [
    'p010r027', 'p010r030'
]

wrs2_list = sorted(
    ee.FeatureCollection('projects/openet/assets/features/wrs2/custom')
    .filterBounds(ee.Geometry.BBox(-124, 26, -68, 50))
    .filter(ee.Filter.inList('mgrs_tile', wrs2_skip_list).Not())
    .aggregate_histogram('wrs2_tile').keys().getInfo(),
    reverse=True
)
print(len(wrs2_list))


ocean_wrs2_list = [
    'p048r027', 'p047r031', 'p047r030', 'p046r033', 'p045r034', 
    'p044r035', 'p043r036', 'p041r037', 'p040r038', 
    'p024r040', 'p024r027', 'p023r040', 'p023r027', 'p020r029',
    'p017r041', 'p016r038', 'p015r040', 'p015r037', 
    'p013r033', 'p012r032', 'p011r031', 'p011r030', 
]

ocean_wrs2_list = [
    'p048r027', 'p047r031', 'p047r030', 'p047r029', 'p046r033', 
    'p045r034', 'p044r035', 'p043r036', 'p041r037', 'p040r038', 
    'p025r040', 'p024r040', 'p024r027', 'p023r040', 
    'p023r027', 'p022r040', 'p021r040','p020r029',
    'p017r041', 'p016r038', 'p015r040', 'p015r037', 
    'p013r033', 'p012r032', 'p011r031', 'p011r030', 
]

In [None]:
land_mask = ee.Image('projects/openet/assets/features/water_mask').Not()
# Apply the NLCD/NALCMS water mask (anywhere it is water, set the ocean mask 
land_mask = land_mask.where(ee.Image("USGS/NLCD_RELEASES/2020_REL/NALCMS").unmask(18).eq(18), 0)
# land_mask = land_mask.And(ee.Image("USGS/NLCD_RELEASES/2020_REL/NALCMS").unmask(18).neq(18))
# # land_mask = ee.Image('projects/openet/assets/meteorology/conus404/ancillary/land_mask')

# etf_coll_id = 'projects/openet/assets/ssebop/conus/gridmet/landsat/c02'
etf_coll_id = 'projects/usgs-gee-nhm-ssebop/assets/ssebop/landsat/c02'
# etf_coll_id = 'projects/openet/assets/intercomparison/ssebop/landsat/c02/v0p2p6'
band_name = 'et_fraction'

rgb_bands = {
    'LT04': ['SR_B3', 'SR_B2', 'SR_B1'],
    'LT05': ['SR_B3', 'SR_B2', 'SR_B1'],
    'LE07': ['SR_B3', 'SR_B2', 'SR_B1'],
    'LC08': ['SR_B4', 'SR_B3', 'SR_B2'],
    'LC09': ['SR_B4', 'SR_B3', 'SR_B2'],
}

# 0 - white, 1 - no fill (green), 2 - shadow (dark blue), 3 - snow (light blue), 4 - cloud (light gray), 5 - water (purple), 6 - ocean mask
fmask_palette = "ffffff, 9effa1, blue, 00aff2, dddddd, purple, bfbfbf"
fmask_max = 6


In [None]:
# Get the list of WRS2 tiles from the SSEBop collection
wrs2_list = sorted(
    ee.ImageCollection(etf_coll_id).filterDate('2020-01-01', '2024-01-01')
    .aggregate_histogram('wrs2_tile').keys().getInfo(),
    reverse=True
)
wrs2_list = wrs2_list + ['p018r028']
# pprint.pprint(wrs2_list)

In [None]:
def fmask(landsat_img):
    # Add the fmask image on top of the true color image
    qa_img = landsat_img.select('QA_PIXEL')
    fill_mask = qa_img.bitwiseAnd(1).neq(0)                  # bits: 0
    dilate_mask = qa_img.rightShift(1).bitwiseAnd(1).neq(0)  # bits: 1
    cirrus_mask = qa_img.rightShift(2).bitwiseAnd(1).neq(0)  # bits: 2
    cloud_mask = qa_img.rightShift(3).bitwiseAnd(1).neq(0)   # bits: 3
    shadow_mask = qa_img.rightShift(4).bitwiseAnd(1).neq(0)  # bits: 4
    snow_mask = qa_img.rightShift(5).bitwiseAnd(1).neq(0)    # bits: 5
    clear_mask = qa_img.rightShift(6).bitwiseAnd(1).neq(0)   # bits: 6
    water_mask = qa_img.rightShift(7).bitwiseAnd(1).neq(0)   # bits: 7
    # cloud_conf = qa_img.rightShift(8).bitwiseAnd(3)          # bits: 8, 9
    # shadow_conf = qa_img.rightShift(10).bitwiseAnd(3)        # bits: 10, 11
    # snow_conf = qa_img.rightShift(12).bitwiseAnd(3)          # bits: 12, 13
    # cirrus_conf = qa_img.rightShift(14).bitwiseAnd(3)        # bits: 14, 15

    # Saturated pixels
    # Flag as saturated if any of the RGB bands are saturated
    #   or change .gt(0) to .gt(7) to flag if all RGB bands are saturated
    # Comment out rightShift line to flag if saturated in any band
    bitshift = ee.Dictionary({'LANDSAT_4': 0, 'LANDSAT_5': 0, 'LANDSAT_7': 0, 'LANDSAT_8': 1, 'LANDSAT_9': 1});
    saturated_mask = (
        landsat_img.select('QA_RADSAT')
        .rightShift(ee.Number(bitshift.get(ee.String(landsat_img.get('SPACECRAFT_ID'))))).bitwiseAnd(7)
        .gt(0)
    )
    
    # Old "Fmask" style image
    fmask_img = (
        qa_img.multiply(0)
        .where(landsat_img.select(['SR_B4']).mask().eq(0), 1)
        # .where(saturated_mask, 6)
        .where(water_mask, 5)
        .where(shadow_mask, 2)
        .where(snow_mask, 3)
        .where(cloud_mask.Or(dilate_mask).Or(cirrus_mask), 4)
        # .add(shadow_mask.multiply(2))
        # .add(snow_mask.multiply(3))
        # .add(cloud_mask.Or(dilate_mask).Or(cirrus_mask).multiply(4))
        # .add(cloud_mask.Or(dilate_mask).multiply(4))
        # .add(cloud_mask.And(cloud_conf).multiply(4))
        # .add(water_mask.multiply(5))
    )
    
    return fmask_img.updateMask(fmask_img.neq(0)).rename(['fmask'])


In [None]:
### Print scenes with low pixel count ratios (few unmasked pixels)
count_threshold_pct_min = 0
count_threshold_pct_max = 101
# count_threshold = 1

start_year = 1984
end_year = 2024
years = list(range(start_year, end_year + 1))

print_count = 100
image_size = 750
# image_size = 900
# image_size = 1024

# Read in the scene skip list
scene_skip_url = '../v2p1.csv'
# scene_skip_url = 'https://raw.githubusercontent.com/cgmorton/scene-skip-list/main/v2p1.csv'
scene_skip_df = pd.read_csv(scene_skip_url)
scene_skip_list = list(scene_skip_df['SCENE_ID'].values)
print(f'Skip list images: {len(scene_skip_list)}')

# scene_cloudscore_url = '../v2p1_cloudscore.csv'
# # scene_cloudscore_url = 'https://raw.githubusercontent.com/cgmorton/scene-skip-list/main/v2p1_cloudscore.csv'
# scene_cloudscore_list = list(pd.read_csv(scene_cloudscore_url)['SCENE_ID'].values)
# print(f'Skip cloudscore images: {len(scene_cloudscore_list)}')


red_band = 'SR_RED'
green_band = 'SR_GREEN'
blue_band = 'SR_BLUE'


print('Reading image stats CSV files')
stats_df_list = []
for wrs2_tile in wrs2_list:
    # if int(wrs2_tile[1:4]) not in range(10, 25):
    #     continue
        
    for year in range(start_year, end_year + 1):
        wrs2_stats_path = os.path.join(stats_ws, f'{year}', f'{wrs2_tile}_{year}.csv')
        if not os.path.isfile(wrs2_stats_path):
            # print(f'  {wrs2_tile}_{year} - Missing stats CSV, skipping')
            continue
        try:
            wrs2_stats_df = pd.read_csv(wrs2_stats_path, index_col=False)
        except Exception as e:
            print(f'  {wrs2_tile}_{year} - Error reading CSV, skipping')
            continue
        if wrs2_stats_df.empty:
            continue
        wrs2_stats_df['DATE'] = wrs2_stats_df['SCENE_ID'].str.slice(12, 20)
        wrs2_stats_df['WRS2'] = 'p' + wrs2_stats_df['SCENE_ID'].str.slice(5, 8) + 'r' + wrs2_stats_df['SCENE_ID'].str.slice(8, 11)
        stats_df_list.append(wrs2_stats_df)

stats_df = pd.concat(stats_df_list)


# Compute the ratios
# stats_df['ACCA_COUNT_RATIO'] = stats_df['ACCA_PIXELS'] / stats_df['TOTAL_PIXELS']
stats_df['SNOW_COUNT_RATIO'] = stats_df['SNOW_PIXELS'] / stats_df['TOTAL_PIXELS']
# stats_df['SHADOW_COUNT_RATIO'] = stats_df['SHADOW_PIXELS'] / stats_df['TOTAL_PIXELS']
stats_df['WATER_COUNT_RATIO'] = stats_df['WATER_PIXELS'] / stats_df['TOTAL_PIXELS']
stats_df['MASKED_PIXELS'] = (
    stats_df['CLOUD_PIXELS'] + stats_df['CIRRUS_PIXELS'] + stats_df['DILATE_PIXELS']
    + stats_df['SHADOW_PIXELS']
    + stats_df['SNOW_PIXELS']
    # + stats_df['WATER_PIXELS']
    + stats_df['ACCA_PIXELS']
    # + stats_df['SATURATED_PIXELS']
)
stats_df['CLOUD_COUNT_RATIO'] = stats_df['MASKED_PIXELS'] / stats_df['TOTAL_PIXELS']
# stats_df['CLOUD_COUNT_RATIO'] = stats_df['UNMASKED_PIXELS'] / stats_df['TOTAL_PIXELS']

print(f'  {len(stats_df.count(axis=1))}')


# Get the subset of target skipped scenes to review
subset_df = stats_df[stats_df['SCENE_ID'].isin(scene_skip_list)].copy()
# subset_df = stats_df[~stats_df['SCENE_ID'].isin(scene_skip_list)].copy()
# subset_df = stats_df[stats_df['SCENE_ID'].isin(scene_cloudscore_list)]

# Only look at Landsat 8 and 9 for this review
#subset_df = subset_df[subset_df['SCENE_ID'].str.slice(0,4).isin(['LC08', 'LC09'])]

# Only look at scenes with a reason of "Missing"
subset_df = subset_df[subset_df['SCENE_ID'].isin(scene_skip_df[scene_skip_df['REASON'].str.contains('Shadow')]['SCENE_ID'].values)].copy()
# subset_df = subset_df[subset_df['SCENE_ID'].isin(scene_skip_df[scene_skip_df['REASON'].str.contains('Snow')]['SCENE_ID'].values)].copy()
# subset_df = subset_df[subset_df['SCENE_ID'].isin(scene_skip_df[scene_skip_df['REASON'].str.contains('Missing')]['SCENE_ID'].values)].copy()
# subset_df = subset_df[subset_df['SCENE_ID'].isin(scene_skip_df[~scene_skip_df['REASON'].str.contains('Snow')]['SCENE_ID'].values)].copy()

# Filter to western scenes in the summer
# subset_df = subset_df[subset_df['DATE'].str.slice(4,6).astype(int).isin([4, 5, 6, 7, 8, 9, 10])]
# subset_df = subset_df[subset_df['WRS2'].str.slice(1,4).astype(int).isin(range(20, 40))].copy()
# subset_df = subset_df[subset_df['WRS2'].str.slice(5,8).astype(int).isin(range(30, 50))].copy()
# print(f'  {len(subset_df.count(axis=1))}')

# subset_df = subset_df[subset_df['DATE'].str.slice(4,6).astype(int).isin([10])]
# subset_df = subset_df[subset_df['WRS2'].str.slice(1,4).astype(int).isin([29])].copy()
# subset_df = subset_df[subset_df['WRS2'].str.slice(5,8).astype(int).isin([37])].copy()

# subset_df = subset_df[subset_df['SNOW_COUNT_RATIO'] < 0.1]
# print(f'  {len(subset_df.count(axis=1))}')

# subset_df = subset_df[~subset_df['WRS2'].isin(['p021r040'])].copy()
# subset_df = subset_df[subset_df['WATER_COUNT_RATIO'] > 0.2]
# print(f'  {len(subset_df.count(axis=1))}')

# Add the high CLOUD_COVER_LAND scenes to the skip list but don't remove from the dataframe
# Do this after pulling the scene skip list subset above
scene_skip_list.extend(subset_df[subset_df['CLOUD_COVER_LAND'] >= 71]['SCENE_ID'].values)



new_skip_scenes = []
new_skip_count = 0

for i, row in subset_df.iterrows():

    scene_id = row["SCENE_ID"].upper()

    wrs2_path = int(scene_id[5:8])
    wrs2_row = int(scene_id[8:11])
    wrs2_tgt = f'{wrs2_path:03d}{wrs2_row:03d}'
    wrs2_above = f'{wrs2_path:03d}{wrs2_row-1:03d}'
    wrs2_below = f'{wrs2_path:03d}{wrs2_row+1:03d}'    

    above_scene_id = scene_id.upper().replace(wrs2_tgt, wrs2_above)
    above_stats_df = stats_df.loc[stats_df['SCENE_ID'] == above_scene_id]
    if len(above_stats_df):
        above_cloud_pct = float(above_stats_df.iloc[0]['CLOUD_COVER_LAND'])
    else:
        above_cloud_pct = None
        
    below_scene_id = scene_id.upper().replace(wrs2_tgt, wrs2_below)
    below_stats_df = stats_df.loc[stats_df['SCENE_ID'] == below_scene_id]
    if len(below_stats_df):
        below_cloud_pct = float(below_stats_df.iloc[0]['CLOUD_COVER_LAND'])
    else:
        below_cloud_pct = None

    # # Only show scenes that have above & below both skipped or None
    # if (((above_scene_id not in scene_skip_list) and (above_cloud_pct is not None)) or 
    #     ((below_scene_id not in scene_skip_list) and (below_cloud_pct is not None))):
    #     continue   
    
    landsat_type = scene_id.split('_')[0].upper()
    landsat_img = ee.Image(f'LANDSAT/{landsat_type}/C02/T1_L2/{scene_id}')
    landsat_region = landsat_img.geometry().bounds(1, 'EPSG:4326')
    landsat_sr_img = landsat_img.select(rgb_bands[landsat_type]).multiply([0.0000275]).add([-0.2])

    # Landsat true color image
    landsat_url = (
        landsat_sr_img.where(land_mask.unmask().eq(0), 0.25)
        .getThumbURL({'min': 0.0, 'max': 0.30, 'gamma': 1.25, 'region': landsat_region, 'dimensions': image_size})
    )

    # Landsat true color with Fmask
    fmask_url = (
        landsat_sr_img.where(land_mask.unmask().eq(0), 0.25).visualize(min=0, max=0.3, gamma=1.25)
        .blend(fmask(landsat_img).where(land_mask.unmask().eq(0), fmask_max).visualize(bands='fmask', min=0, max=fmask_max, palette=fmask_palette))
        .getThumbURL({'region': landsat_region, 'dimensions': image_size})
    )

    print('#'*80)
    print(
        f'  {scene_id}  {row["TOTAL_PIXELS"]:>10d}  {row["UNMASKED_PIXELS"]:>10d}'
        f'  ({row["CLOUD_COUNT_RATIO"]:>0.2f}) ({row["SNOW_COUNT_RATIO"]:>0.2f}) {row["CLOUD_COVER_LAND"]}'
        f'  {row[red_band]:0.2f}  {row[green_band]:0.2f}  {row[blue_band]:0.2f}'
    )
    # print(landsat_url)
    # print(fmask_url)
    ipyplot.plot_images([landsat_url, fmask_url], img_width=image_size)


    # Show the images above and below the target wrs2
    above_img = ee.Image(f'LANDSAT/{landsat_type}/C02/T1_L2/{above_scene_id}')
    above_region = above_img.geometry().bounds(1, 'EPSG:4326')
    above_sr_img = above_img.select(rgb_bands[landsat_type]).multiply([0.0000275]).add([-0.2])
    try:
        above_url = (
            above_sr_img.where(land_mask.unmask().eq(0), 0.25).visualize(min=0, max=0.3, gamma=1.25)
            .blend(fmask(above_img).where(land_mask.unmask().eq(0), fmask_max).visualize(bands='fmask', min=0, max=fmask_max, palette=fmask_palette))
            .getThumbURL({'region': above_region, 'dimensions': image_size})
        )
    except:
        above_url = None
        
    below_img = ee.Image(f'LANDSAT/{landsat_type}/C02/T1_L2/{below_scene_id}')
    below_region = below_img.geometry().bounds(1, 'EPSG:4326')
    below_sr_img = below_img.select(rgb_bands[landsat_type]).multiply([0.0000275]).add([-0.2])
    try:
        below_url = (
            below_sr_img.where(land_mask.unmask().eq(0), 0.25).visualize(min=0, max=0.3, gamma=1.25)
            .blend(fmask(below_img).where(land_mask.unmask().eq(0), fmask_max).visualize(bands='fmask', min=0, max=fmask_max, palette=fmask_palette))
            .getThumbURL({'region': below_region, 'dimensions': image_size})
        )
    except:
        below_url = None

    above_skipped = f' (skipped)' if above_scene_id in scene_skip_list else ''   
    below_skipped = f' (skipped)' if below_scene_id in scene_skip_list else ''
    
    if above_url and below_url:
        print(f'{below_scene_id} ({below_cloud_pct}){below_skipped}  {above_scene_id} ({above_cloud_pct}){above_skipped}')
        ipyplot.plot_images([below_url, above_url], img_width=image_size)
    elif above_url:
        print(f'{above_scene_id} ({above_cloud_pct}){above_skipped}')
        ipyplot.plot_images([above_url], img_width=image_size)
    elif below_url:
        print(f'{below_scene_id} ({below_cloud_pct}){below_skipped}')
        ipyplot.plot_images([below_url], img_width=image_size)


    new_skip_scenes.append(scene_id)
    new_skip_count += 1
    if new_skip_count >= print_count:
        break

print('\nDone')


In [None]:
# ### Display old PTJPL images with water pixels that need to be rebuilt
# start_year = 2015
# end_year = 2024
# years = list(range(start_year, end_year + 1))

# print_count = 100
# image_size = 700


# print('Getting PTJPL scene list')
# ptjpl_coll_id = 'projects/openet/assets/ptjpl/conus/gridmet/landsat/c02'
# ptjpl_coll = (
#     ee.ImageCollection(ptjpl_coll_id).filterDate('2015-09-01', f'{end_year+1}-01-01')
#     .filterMetadata('model_version', 'not_equals', '0.4.1')
# )
# ptjpl_scenes = set(ptjpl_coll.aggregate_array('system:index').getInfo())
# print(len(ptjpl_scenes))


# print('Reading image stats CSV files')
# stats_df_list = []
# for wrs2_tile in wrs2_list:
#     # if int(wrs2_tile[1:4]) not in range(10, 25):
#     #     continue
        
#     for year in range(start_year, end_year + 1):
#         wrs2_stats_path = os.path.join(stats_ws, f'{year}', f'{wrs2_tile}_{year}.csv')
#         if not os.path.isfile(wrs2_stats_path):
#             # print(f'  {wrs2_tile}_{year} - Missing stats CSV, skipping')
#             continue
#         try:
#             wrs2_stats_df = pd.read_csv(wrs2_stats_path, index_col=False)
#         except Exception as e:
#             print(f'  {wrs2_tile}_{year} - Error reading CSV, skipping')
#             continue
#         if wrs2_stats_df.empty:
#             continue
#         wrs2_stats_df['DATE'] = wrs2_stats_df['SCENE_ID'].str.slice(12, 20)
#         wrs2_stats_df['WRS2'] = 'p' + wrs2_stats_df['SCENE_ID'].str.slice(5, 8) + 'r' + wrs2_stats_df['SCENE_ID'].str.slice(8, 11)
#         stats_df_list.append(wrs2_stats_df)

# stats_df = pd.concat(stats_df_list)
# print(f'  {len(stats_df.count(axis=1))}')


# # Compute the ratios
# # stats_df['ACCA_COUNT_RATIO'] = stats_df['ACCA_PIXELS'] / stats_df['TOTAL_PIXELS']
# stats_df['SNOW_COUNT_RATIO'] = stats_df['SNOW_PIXELS'] / stats_df['TOTAL_PIXELS']
# # stats_df['SHADOW_COUNT_RATIO'] = stats_df['SHADOW_PIXELS'] / stats_df['TOTAL_PIXELS']
# stats_df['WATER_COUNT_RATIO'] = stats_df['WATER_PIXELS'] / stats_df['TOTAL_PIXELS']
# stats_df['MASKED_PIXELS'] = (
#     stats_df['CLOUD_PIXELS'] + stats_df['CIRRUS_PIXELS'] + stats_df['DILATE_PIXELS']
#     + stats_df['SHADOW_PIXELS']
#     + stats_df['SNOW_PIXELS']
#     # + stats_df['WATER_PIXELS']
#     + stats_df['ACCA_PIXELS']
#     # + stats_df['SATURATED_PIXELS']
# )
# stats_df['CLOUD_COUNT_RATIO'] = stats_df['MASKED_PIXELS'] / stats_df['TOTAL_PIXELS']
# # stats_df['CLOUD_COUNT_RATIO'] = stats_df['UNMASKED_PIXELS'] / stats_df['TOTAL_PIXELS']

# # Filter to existing PTJPL scenes
# subset_df = stats_df[stats_df["SCENE_ID"].str.lower().isin(ptjpl_scenes)].copy()

# # Filter to scenes with lots of water pixels
# subset_df = subset_df[subset_df['WATER_COUNT_RATIO'] > 0.01].copy()
# subset_df.sort_values('WATER_COUNT_RATIO', ascending=False, inplace=True)
# # subset_df = subset_df[~subset_df['WRS2'].isin(['p021r040'])]
# print(f'  {len(subset_df.count(axis=1))}')


# for i, row in subset_df.iterrows():
#     # TODO: 
#     if row["SCENE_ID"].lower() not in ptjpl_scenes:
#         continue

#     scene_id = row["SCENE_ID"].upper()

#     wrs2_path = int(scene_id[5:8])
#     wrs2_row = int(scene_id[8:11])
#     wrs2_tgt = f'{wrs2_path:03d}{wrs2_row:03d}'
#     wrs2_above = f'{wrs2_path:03d}{wrs2_row-1:03d}'
#     wrs2_below = f'{wrs2_path:03d}{wrs2_row+1:03d}'    
    
#     landsat_type = scene_id.split('_')[0].upper()
#     landsat_img = ee.Image(f'LANDSAT/{landsat_type}/C02/T1_L2/{scene_id}')
#     landsat_region = landsat_img.geometry().bounds(1, 'EPSG:4326')
#     landsat_sr_img = landsat_img.select(rgb_bands[landsat_type]).multiply([0.0000275]).add([-0.2])

#     # Landsat true color image
#     landsat_url = (
#         landsat_sr_img.where(land_mask.unmask().eq(0), 0.25)
#         .getThumbURL({'min': 0.0, 'max': 0.30, 'gamma': 1.25, 'region': landsat_region, 'dimensions': image_size})
#     )

#     # Landsat true color with Fmask
#     fmask_url = (
#         landsat_sr_img.where(land_mask.unmask().eq(0), 0.25).visualize(min=0, max=0.3, gamma=1.25)
#         .blend(fmask(landsat_img).where(land_mask.unmask().eq(0), fmask_max).visualize(bands='fmask', min=0, max=fmask_max, palette=fmask_palette))
#         .getThumbURL({'region': landsat_region, 'dimensions': image_size})
#     )

#     # PTJPL scene
#     ptjpl_img = (
#         ee.Image(f'{ptjpl_coll_id}/{row["SCENE_ID"].lower()}')
#         .divide(ee.Image(f'projects/openet/assets/reference_et/conus/gridmet/daily/v1/{scene_id[12:20]}').select(['eto']).resample('bilinear'))
#     )
#     # viridis = ['#440154', '#433982', '#30678D', '#218F8B', '#36B677', '#8ED542', '#FDE725']
#     et_palette = ['DEC29B', 'E6CDA1', 'EDD9A6', 'F5E4A9', 'FFF4AD', 'C3E683', '6BCC5C', '3BB369', '20998F', '1C8691', '16678A', '114982', '0B2C7A']
#     ptjpl_url = (
#         ptjpl_img.divide(1000).where(land_mask.unmask().eq(0), 0.25).visualize(bands='et', min=0, max=1.25, palette=et_palette)
#         .getThumbURL({'region': landsat_region, 'dimensions': image_size})
#     )

#     print('#'*80)
#     print(
#         f'  {scene_id}  {row["TOTAL_PIXELS"]:>10d}  {row["UNMASKED_PIXELS"]:>10d}'
#         f'  ({row["CLOUD_COUNT_RATIO"]:>0.2f}) ({row["SNOW_COUNT_RATIO"]:>0.2f}) {row["CLOUD_COVER_LAND"]}'
#     )
#     print(landsat_url)
#     print(fmask_url)
#     print(ptjpl_url)
#     ipyplot.plot_images([landsat_url, fmask_url, ptjpl_url], img_width=image_size)

# print('\nDone')


In [None]:
# ### Remove existing PTJPL scenes with water pixels so they can be rerun with 0.4.1
# start_year = 2015
# end_year = 2023

# print('Getting PTJPL scene list')
# ptjpl_coll_id = 'projects/openet/assets/ptjpl/conus/gridmet/landsat/c02'
# ptjpl_coll = (
#     ee.ImageCollection(ptjpl_coll_id).filterDate('2015-09-01', f'{end_year+1}-01-01')
#     .filterMetadata('model_version', 'not_equals', '0.4.1')
# )
# ptjpl_scenes = set(ptjpl_coll.aggregate_array('system:index').getInfo())
# print(len(ptjpl_scenes))


# print('Reading image stats CSV files')
# stats_df_list = []
# for wrs2_tile in wrs2_list:
#     # if int(wrs2_tile[1:4]) not in range(10, 25):
#     #     continue
        
#     for year in range(start_year, end_year + 1):
#         wrs2_stats_path = os.path.join(stats_ws, f'{year}', f'{wrs2_tile}_{year}.csv')
#         if not os.path.isfile(wrs2_stats_path):
#             # print(f'  {wrs2_tile}_{year} - Missing stats CSV, skipping')
#             continue
#         try:
#             wrs2_stats_df = pd.read_csv(wrs2_stats_path, index_col=False)
#         except Exception as e:
#             print(f'  {wrs2_tile}_{year} - Error reading CSV, skipping')
#             continue
#         if wrs2_stats_df.empty:
#             continue
#         wrs2_stats_df['DATE'] = wrs2_stats_df['SCENE_ID'].str.slice(12, 20)
#         wrs2_stats_df['WRS2'] = 'p' + wrs2_stats_df['SCENE_ID'].str.slice(5, 8) + 'r' + wrs2_stats_df['SCENE_ID'].str.slice(8, 11)
#         stats_df_list.append(wrs2_stats_df)

# stats_df = pd.concat(stats_df_list)
# print(f'  {len(stats_df.count(axis=1))}')


# # Filter to existing PTJPL scenes
# subset_df = stats_df[stats_df["SCENE_ID"].str.lower().isin(ptjpl_scenes)].copy()

# # Compute the ratios
# subset_df['WATER_COUNT_RATIO'] = subset_df['WATER_PIXELS'] / subset_df['TOTAL_PIXELS']

# # Filter to scenes with lots of water pixels
# subset_df = subset_df[subset_df['WATER_COUNT_RATIO'] > 0.001].copy()
# print(f'  {len(subset_df.count(axis=1))}')


# # Remove the PTJPL images
# for i, row in subset_df.iterrows():
#     if row["SCENE_ID"].lower() not in ptjpl_scenes:
#         continue
#     print(row["SCENE_ID"])
#     ee.data.deleteAsset(f'{ptjpl_coll_id}/{row["SCENE_ID"].lower()}')

# print('\nDone')