In [1]:
import numpy as np
import pandas as pd
import ee
import geemap
import os
import glob

In [2]:
ee.Authenticate()
ee.Initialize(project = "ee-sarice")

In [3]:
# load cci lake points
cci_lakes = ee.FeatureCollection("projects/ee-sarice/assets/CCI_Lakes")

In [4]:
# CCI lake ids
cci_lake_ids = cci_lakes.aggregate_array("CCI ID").getInfo()

In [5]:
years = list(range(2000, 2024))

In [6]:
out_dir = "/mnt/Data_2tb/laketemp_bias/modis_cloud/raw"

In [7]:
for yr in years:
    # Load MODIS LST
    modis_lst = ee.ImageCollection("MODIS/061/MOD11A1").select("LST_Day_1km").filterDate(f"{yr}-01-01", f"{yr+1}-01-01")
    # extract modis lst
    geemap.zonal_statistics(modis_lst.toBands(),
                        cci_lakes,
                        f"{out_dir}/{yr}_terra_lst.csv", # should be total pixels.csv
                        statistics_type='MEAN',
                        scale = 1000,
                       )

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/66b796e9be761ad0f39a93319781f449-69180352e8fae48f4d9586dea403196b:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2000_terra_lst.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/4114d9242722acfadd5f68d90190e736-27506213c06a3b454e68436ad8af9e80:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2001_terra_lst.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/c53b920bc582851703c6966e1239d120-015dc83382a4738121b58c74dd7f6d93:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/modis_cloud/raw/2002_terra_lst.csv
Computing statistics ...
Generating URL ...
Downloading data from https://eartheng

# Merge to one dataframe

In [8]:
import numpy as np
import pandas as pd
import os
import glob

In [2]:
# out_dir = "/mnt/Data_2tb/laketemp_bias/modis_cloud/raw"
out_dir = "/nas/cee-hydro/laketemp_bias/modis_cloud/raw"

In [3]:
cloud_cover_df = pd.DataFrame([])

for yr in list(range(2000, 2024)):
    # read the raw file
    df_lst = pd.read_csv(f"{out_dir}/{yr}_terra_lst.csv",).set_index("CCI ID").iloc[:, :-10]
    # if na, then it is cloud cover
    df_cloud_cover = df_lst.isna()
    df_cloud_cover.columns = pd.to_datetime([col.replace("_LST_Day_1km", "") for col in df_cloud_cover.columns], format = "%Y_%m_%d")
    df_cloud_cover = df_cloud_cover.T
    df_cloud_cover.columns.name = ""
    df_cloud_cover.index.name = "date"
    cloud_cover_df = pd.concat([cloud_cover_df, df_cloud_cover], axis = 0)

# set column names to str 
cloud_cover_df.columns = cloud_cover_df.columns.astype(str)
# True means cloudy, False means not cloudy
cloud_cover_df

Unnamed: 0_level_0,799,3114,7889,2516,12262,1519,3053,1203,3350,3607,...,278,293,300000771,378,317,473,309,141,212,170
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-02-24,True,True,True,True,True,True,True,True,True,True,...,False,True,False,True,True,True,True,True,True,True
2000-02-25,True,True,True,True,False,False,True,True,True,False,...,False,True,True,True,True,True,False,True,True,True
2000-02-26,False,True,True,True,True,True,True,False,False,True,...,True,True,True,True,True,True,True,True,True,True
2000-02-27,True,True,True,True,True,True,True,False,False,True,...,True,True,True,True,True,True,True,True,False,True
2000-02-28,False,False,True,True,True,False,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,False,True,True,True,True,True,True,False,False,False,...,True,True,True,True,True,True,True,True,True,True
2023-12-28,True,True,False,True,True,False,True,False,False,False,...,False,True,True,False,True,True,False,True,False,True
2023-12-29,True,False,True,False,True,False,True,False,False,False,...,True,True,True,False,True,True,False,True,False,True
2023-12-30,False,True,False,True,True,True,True,False,True,False,...,False,True,True,True,True,True,True,True,False,True


# Mask out cloudy days

In [5]:
water_temp_df = pd.read_csv("/nas/cee-hydro/laketemp_bias/era5land/water_temp.csv", index_col=0, parse_dates=True)

In [6]:
# keep the non-cloudy observations
temp_df = water_temp_df.mask(cloud_cover_df, np.nan)
temp_df.loc[pd.date_range("2020-07-01", "2020-07-31"), "62"]

2020-07-01          NaN
2020-07-02          NaN
2020-07-03          NaN
2020-07-04    10.918571
2020-07-05          NaN
2020-07-06          NaN
2020-07-07          NaN
2020-07-08          NaN
2020-07-09          NaN
2020-07-10    10.666950
2020-07-11    11.385794
2020-07-12          NaN
2020-07-13    12.507469
2020-07-14    13.122165
2020-07-15          NaN
2020-07-16    13.381817
2020-07-17    14.801996
2020-07-18    14.255700
2020-07-19          NaN
2020-07-20          NaN
2020-07-21          NaN
2020-07-22          NaN
2020-07-23          NaN
2020-07-24          NaN
2020-07-25          NaN
2020-07-26    14.268321
2020-07-27    13.085286
2020-07-28    14.051697
2020-07-29    13.128550
2020-07-30          NaN
2020-07-31          NaN
Name: 62, dtype: float64

In [7]:
water_temp_df_cloud = water_temp_df.mask(cloud_cover_df, np.nan)
water_temp_df_cloud.to_csv("/nas/cee-hydro/laketemp_bias/era5land/water_temp_cloud.csv")

In [9]:
water_temp_df_cloud

Unnamed: 0,799,3114,7889,2516,12262,1519,3053,1203,3350,3607,...,300000430,278,293,300000771,378,473,309,141,212,170
2000-01-01,,,,,,,,,,,...,,,,,,,,,,
2000-01-02,,,,,,,,,,,...,,,,,,,,,,
2000-01-03,,,,,,,,,,,...,,,,,,,,,,
2000-01-04,,,,,,,,,,,...,,,,,,,,,,
2000-01-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,15.396758,,,,,,,4.061035,3.459507,21.349592,...,-0.000342,,,,,,,,,
2023-12-28,,,6.700112,,,7.396017,,4.053041,3.504207,21.951714,...,-0.000342,6.988318,,,2.999573,,0.752726,,-0.000342,
2023-12-29,,3.617657,,4.023334,,6.898298,,3.977074,4.106709,22.446399,...,-0.000342,,,,2.998324,,0.720556,,-0.000342,
2023-12-30,15.608841,,5.829605,,,,,3.873746,,22.942908,...,,7.067787,,,,,,,-0.000342,
