In [1]:
import numpy as np
import pandas as pd
import ee
import geemap
import os
import glob

In [2]:
ee.Authenticate()
ee.Initialize(project = "ee-sarice")

In [3]:
# load cci lake points
# creat a 10km buffer of the lake --> ERA5-Land pixel length
cci_lakes = ee.FeatureCollection("projects/ee-sarice/assets/CCI_Lakes").map(lambda img: img.buffer(10 * 1000))

In [4]:
# CCI lake ids
cci_lake_ids = cci_lakes.aggregate_array("CCI ID").getInfo()

In [5]:
years = list(range(2000, 2024))

In [6]:
out_dir = "/mnt/Data_2tb/laketemp_bias/modis_cloud/raw"

In [None]:
for yr in years:
    # Load MODIS LST
    modis_lst = ee.ImageCollection("MODIS/061/MOD11A1").select("LST_Day_1km").filterDate(f"{yr}-01-01", f"{yr+1}-01-01")
    modis_lst = modis_lst.map(lambda x: x.multiply(0).add(1))
    # load unmasked modis lst
    modis_lst_unmask = modis_lst.map(lambda x: x.unmask(0))
    # calculate
    geemap.zonal_statistics(modis_lst_unmask.toBands(), 
                        cci_lakes,
                        f"{out_dir}/{yr}_valid_pixels.csv", # should be total pixels.csv
                        statistics_type='COUNT',
                        scale = 1000,
                       )
    geemap.zonal_statistics(modis_lst.toBands(), 
                        cci_lakes,
                        f"{out_dir}/{yr}_total_pixels.csv", # should be valid_pixels.csv
                        statistics_type='COUNT',
                        scale = 1000,
                       )

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/74b6da35f961bd30df2232fca5988bcf-e78fa538f82209f326ab66a5a977b140:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2000_valid_pixels.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/d9682d7197f7daad052f8d7751a8b3e1-22a963d6e85644cf8265e42ca6a405a4:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2000_total_pixels.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/233573e9ede7f39ead9bffbce51b46dc-29fcdcdcba43e82da1bd4a392435dbb2:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2001_valid_pixels.csv
Computing statistics ...
Generating URL ...
Downloading data from https:/

# Calculate cloud cover and export to one time series

Attention, I made an mistake by naming `*_total_pixels.csv` to `*_valid_pixels.csv` while exporting the csv from MODIS

In [3]:
out_dir = "/mnt/Data_2tb/laketemp_bias/modis_cloud/raw"

In [4]:
cloud_cover_df = pd.DataFrame([])

for yr in list(range(2000, 2024)):
    # fix the mistake
    df_total_pixels = pd.read_csv(f"{out_dir}/{yr}_valid_pixels.csv",).set_index("CCI ID").iloc[:, :-10]
    df_valid_pixels = pd.read_csv(f"{out_dir}/{yr}_total_pixels.csv",).set_index("CCI ID").iloc[:, :-10]
    # calculate cloud cover
    df_cloud_cover = -df_valid_pixels/df_total_pixels + 1
    df_cloud_cover.columns = pd.to_datetime([col.replace("_LST_Day_1km", "") for col in df_cloud_cover.columns], format = "%Y_%m_%d")
    df_cloud_cover = df_cloud_cover.T
    df_cloud_cover.columns.name = ""
    df_cloud_cover.index.name = "date"
    cloud_cover_df = pd.concat([cloud_cover_df, df_cloud_cover], axis = 0)
    
cloud_cover_df

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2000_valid_pixels.csv'

In [4]:
cloud_cover_df.to_csv("/mnt/Data_2tb/laketemp_bias/modis_cloud/cci_cloud_cover.csv")

In [2]:
# take a example of lake 62
cloud_cover_df = pd.read_csv("/mnt/Data_2tb/laketemp_bias/modis_cloud/cci_cloud_cover.csv",
                            index_col = 0, parse_dates = True)
cloud_cover_df.loc[pd.date_range("2020-07-01", "2020-07-31"), "62"]

2020-07-01    1.000000
2020-07-02    0.963576
2020-07-03    1.000000
2020-07-04    0.370861
2020-07-05    1.000000
2020-07-06    1.000000
2020-07-07    0.960265
2020-07-08    1.000000
2020-07-09    1.000000
2020-07-10    0.000000
2020-07-11    0.046358
2020-07-12    1.000000
2020-07-13    0.152318
2020-07-14    0.000000
2020-07-15    0.536424
2020-07-16    0.096026
2020-07-17    0.000000
2020-07-18    0.112583
2020-07-19    1.000000
2020-07-20    1.000000
2020-07-21    0.715232
2020-07-22    0.417219
2020-07-23    1.000000
2020-07-24    1.000000
2020-07-25    1.000000
2020-07-26    0.029801
2020-07-27    0.000000
2020-07-28    0.046358
2020-07-29    0.000000
2020-07-30    1.000000
2020-07-31    1.000000
Name: 62, dtype: float64

# creat water temperature with 25% cloud gap

In [3]:
water_temp_df = pd.read_csv("/mnt/Data_2tb/laketemp_bias/era5land/water_temp.csv", index_col=0, parse_dates=True)
cloud_cover_df = pd.read_csv("/mnt/Data_2tb/laketemp_bias/modis_cloud/cci_cloud_cover.csv", index_col=0, parse_dates=True)

In [4]:
temp_df = water_temp_df.mask(cloud_cover_df>0.25, np.nan)
temp_df.loc[pd.date_range("2020-07-01", "2020-07-31"), "62"]

2020-07-01          NaN
2020-07-02          NaN
2020-07-03          NaN
2020-07-04          NaN
2020-07-05          NaN
2020-07-06          NaN
2020-07-07          NaN
2020-07-08          NaN
2020-07-09          NaN
2020-07-10    10.666950
2020-07-11    11.385794
2020-07-12          NaN
2020-07-13    12.507469
2020-07-14    13.122165
2020-07-15          NaN
2020-07-16    13.381817
2020-07-17    14.801996
2020-07-18    14.255700
2020-07-19          NaN
2020-07-20          NaN
2020-07-21          NaN
2020-07-22          NaN
2020-07-23          NaN
2020-07-24          NaN
2020-07-25          NaN
2020-07-26    14.268321
2020-07-27    13.085286
2020-07-28    14.051697
2020-07-29    13.128550
2020-07-30          NaN
2020-07-31          NaN
Name: 62, dtype: float64

In [5]:
water_temp_df_cloudgapped_25 = water_temp_df.mask(cloud_cover_df>0.25, np.nan)
water_temp_df_cloudgapped_25.to_csv("/mnt/Data_2tb/laketemp_bias/era5land/water_temp_cloud_25.csv")