In [1]:
import numpy as np
import pandas as pd
import ee
import geemap
import os
import glob

In [2]:
ee.Authenticate()
ee.Initialize(project = "ee-algae")

In [3]:
# load cci lake points
# assign a 5km buffer --> ERA5-Land pixel size
cci_lakes = ee.FeatureCollection("projects/ee-sarice/assets/CCI_Lakes").map(lambda img: img.buffer(5 * 1000))

In [4]:
# CCI lake ids
cci_lake_ids = cci_lakes.aggregate_array("CCI ID").getInfo()

In [5]:
# Terra starts from 2002-07-04
years = list(range(2003, 2024))

In [6]:
out_dir = "/mnt/Data_2tb/laketemp_bias/modis_cloud/raw"

In [7]:
# function from https://spatialthoughts.com/2021/08/19/qa-bands-bitmasks-gee/
def bitwiseExtract(inputBit,
                  fromBit,
                  toBit):
    maskSize = ee.Number(1).add(toBit).subtract(fromBit)
    mask = ee.Number(1).leftShift(maskSize).subtract(1)
    return inputBit.rightShift(fromBit).bitwiseAnd(mask)

In [8]:
# def extract_cloud_qc_flag(img):
#     # extract bit from 0-1 (data quality) and 6-7 (average error)
#     # Bits 0-1: 0: clear, 1: cloudy, 2: mixed, 3: not set, assumed clear
#     qc_img = bitwiseExtract(img.select("state_1km"), 0, 1).rename("qcflag")
#     # remove the bit bands
#     return qc_img

def extract_cloud_mask(img):
    # extract bit from 0-1 (data quality) and 6-7 (average error)
    # Bits 0-1: 0: clear, 1: cloudy, 2: mixed, 3: not set, assumed clear
    total = img.select("sur_refl_b01").multiply(0).add(1).rename("total")
    cloudmask = bitwiseExtract(img.select("state_1km"), 0, 1).eq(1).rename("cloud")
    
    output_mask = total.addBands(cloudmask)
    # remove the bit bands
    return output_mask

In [9]:
for yr in years:
    # Load MODIS Terra and Aqua
    # terra = ee.ImageCollection("MODIS/061/MOD09GA").filterDate(f"{yr}-01-01", f"{yr+1}-01-01")
    aqua = ee.ImageCollection("MODIS/061/MYD09GA").filterDate(f"{yr}-01-01", f"{yr+1}-01-01")
    
    # extract cloud mask
    # terra_cloud_mask = terra.map(extract_cloud_mask)
    aqua_cloud_mask = aqua.map(extract_cloud_mask)
    
    # extract the count of cloudy pixel
    geemap.zonal_statistics(aqua_cloud_mask.toBands(),
                            cci_lakes,
                            f"{out_dir}/{yr}_aqua_cloud.csv",
                            statistics_type='SUM',
                            scale = 1000,
                           )

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-algae/tables/531fa8d8bf71c484e30cead5094a93a7-e00718a6f6a9d48656be2d6a6b9d0cea:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2003_aqua_cloud.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-algae/tables/6ee1d8a51b7a7fcd10014e98817d8b91-72b8e7eb0db410b2a33afbc946c02d76:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2004_aqua_cloud.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-algae/tables/03bf50ae6760b1bbf49476ece517c8b3-e7426d34b47ac16fd5433b413a50b062:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2005_aqua_cloud.csv
Computing statistics ...
Generating URL ...
Downloading data from https://eartheng

In [5]:
water_temp_df = pd.read_csv("/nas/cee-hydro/laketemp_bias/era5land/water_temp.csv", index_col=0, parse_dates=True)

In [6]:
# keep the non-cloudy observations
temp_df = water_temp_df.mask(cloud_cover_df, np.nan)
temp_df.loc[pd.date_range("2020-07-01", "2020-07-31"), "62"]

2020-07-01          NaN
2020-07-02          NaN
2020-07-03          NaN
2020-07-04    10.918571
2020-07-05          NaN
2020-07-06          NaN
2020-07-07          NaN
2020-07-08          NaN
2020-07-09          NaN
2020-07-10    10.666950
2020-07-11    11.385794
2020-07-12          NaN
2020-07-13    12.507469
2020-07-14    13.122165
2020-07-15          NaN
2020-07-16    13.381817
2020-07-17    14.801996
2020-07-18    14.255700
2020-07-19          NaN
2020-07-20          NaN
2020-07-21          NaN
2020-07-22          NaN
2020-07-23          NaN
2020-07-24          NaN
2020-07-25          NaN
2020-07-26    14.268321
2020-07-27    13.085286
2020-07-28    14.051697
2020-07-29    13.128550
2020-07-30          NaN
2020-07-31          NaN
Name: 62, dtype: float64

In [7]:
water_temp_df_cloud = water_temp_df.mask(cloud_cover_df, np.nan)
water_temp_df_cloud.to_csv("/nas/cee-hydro/laketemp_bias/era5land/water_temp_cloud.csv")

In [9]:
water_temp_df_cloud

Unnamed: 0,799,3114,7889,2516,12262,1519,3053,1203,3350,3607,...,300000430,278,293,300000771,378,473,309,141,212,170
2000-01-01,,,,,,,,,,,...,,,,,,,,,,
2000-01-02,,,,,,,,,,,...,,,,,,,,,,
2000-01-03,,,,,,,,,,,...,,,,,,,,,,
2000-01-04,,,,,,,,,,,...,,,,,,,,,,
2000-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,15.396758,,,,,,,4.061035,3.459507,21.349592,...,-0.000342,,,,,,,,,
2023-12-28,,,6.700112,,,7.396017,,4.053041,3.504207,21.951714,...,-0.000342,6.988318,,,2.999573,,0.752726,,-0.000342,
2023-12-29,,3.617657,,4.023334,,6.898298,,3.977074,4.106709,22.446399,...,-0.000342,,,,2.998324,,0.720556,,-0.000342,
2023-12-30,15.608841,,5.829605,,,,,3.873746,,22.942908,...,,7.067787,,,,,,,-0.000342,
