In [1]:
import numpy as np
import pandas as pd
import os
import glob

In [2]:
# Terra starts from 2002-07-04
years = list(range(2003, 2024))

In [3]:
# raw_dir = "/mnt/Data_2tb/laketemp_bias/modis_cloud/raw"
raw_dir = "/nas/cee-hydro/laketemp_bias/modis_cloud/raw"
water_temp_path = "/nas/cee-hydro/laketemp_bias/era5land/water_temp.csv"
water_temp_cloud_path = "/nas/cee-hydro/laketemp_bias/era5land/water_temp_cloud.csv"

# 1. Caluclate cloud cover fraction from both aqua and terra
- Calculate the average cloud cover fraction
- If nan in aqua, pick terra; Same in the contrast situation

In [4]:
cloud_cover_df = pd.DataFrame([])

for yr in years:
    # compute terra cloud cover fraction
    terra_df = pd.read_csv(f"{raw_dir}/{yr}_terra_cloud.csv", low_memory=False).set_index("CCI ID").iloc[:, :-10]
    terra_df_cloud = terra_df.loc[:, [col for col in terra_df.columns if "_cloud" in col]]
    terra_df_cloud.columns = [col.replace("_cloud", "") for col in terra_df_cloud.columns]
    terra_df_total = terra_df.loc[:, [col for col in terra_df.columns if "_total" in col]]
    terra_df_total.columns = [col.replace("_total", "") for col in terra_df_total.columns]
    terra_cloud_cover_fraction = terra_df_cloud/terra_df_total
    # Transpose
    terra_cloud_cover_fraction = terra_cloud_cover_fraction.T
    # set column names as "", set index name as date
    terra_cloud_cover_fraction.columns.name = ""
    terra_cloud_cover_fraction.index.name = "date"

    # compute aqua cloud cover fraction
    aqua_df = pd.read_csv(f"{raw_dir}/{yr}_aqua_cloud.csv", low_memory=False).set_index("CCI ID").iloc[:, :-10]
    aqua_df_cloud = aqua_df.loc[:, [col for col in aqua_df.columns if "_cloud" in col]]
    aqua_df_cloud.columns = [col.replace("_cloud", "") for col in aqua_df_cloud.columns]
    aqua_df_total = aqua_df.loc[:, [col for col in aqua_df.columns if "_total" in col]]
    aqua_df_total.columns = [col.replace("_total", "") for col in aqua_df_total.columns]
    aqua_cloud_cover_fraction = aqua_df_cloud/aqua_df_total
    # Transpose
    aqua_cloud_cover_fraction = aqua_cloud_cover_fraction.T
    # set column names as "", set index name as date
    aqua_cloud_cover_fraction.columns.name = ""
    aqua_cloud_cover_fraction.index.name = "date"

    # merge them as one integrate cloud cover df
    cloud_df = (aqua_cloud_cover_fraction + terra_cloud_cover_fraction) / 2
    cloud_df = cloud_df.combine_first(aqua_cloud_cover_fraction).combine_first(terra_cloud_cover_fraction)

    # concat to the total df
    cloud_cover_df = pd.concat([cloud_cover_df, cloud_df], axis = 0)

# set the index as pandas datetime
cloud_cover_df.index = pd.to_datetime(cloud_cover_df.index, format = "%Y_%m_%d")
# set the column as string
cloud_cover_df.columns = cloud_cover_df.columns.astype(str)

In [13]:
cloud_cover_df

Unnamed: 0_level_0,799,3114,7889,2516,12262,1519,3053,1203,3350,3607,...,278,293,300000771,378,317,473,309,141,212,170
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2003-01-01,0.000000,0.993199,0.948821,1.000000,1.000000,0.510348,1.000000,1.000000,0.395166,1.000000,...,0.924372,0.241399,1.000000,0.665022,0.291911,1.016553,0.523154,0.189356,,0.874073
2003-01-02,0.151232,0.727156,1.000000,0.975703,0.977547,1.000000,0.404091,0.393753,1.000000,0.500000,...,0.001787,0.414920,1.000000,0.303420,0.654733,0.503548,0.311300,0.471051,,0.204391
2003-01-03,0.570308,0.150984,0.000000,0.102630,0.600853,0.004241,0.087747,0.994745,1.000000,1.000000,...,0.505122,0.559583,0.826362,0.660833,1.000000,0.006723,0.610462,1.001418,,0.890081
2003-01-04,1.000000,0.284099,0.000000,0.036699,0.110640,0.000000,0.176828,1.000000,0.389405,0.084609,...,0.711279,0.012829,0.973964,1.000000,0.995865,0.495758,1.000000,1.000000,,0.227949
2003-01-05,0.000000,0.665386,0.177112,0.708647,0.812827,0.356884,0.103483,1.000000,1.000000,0.000000,...,0.737395,0.036859,0.621939,0.793550,0.832649,0.618951,0.169201,0.993196,,0.013607
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,0.042089,1.000000,1.000000,1.000000,1.000000,1.000000,0.756388,0.310419,0.119745,0.000000,...,1.000000,0.848269,1.000000,0.969695,0.336090,0.006376,0.992558,0.568837,,0.084441
2023-12-28,1.000000,0.839548,0.501360,1.000000,0.879867,0.640476,0.999440,0.000000,0.500000,0.000000,...,0.293896,1.000000,1.000000,0.249186,0.000000,0.317639,0.000000,0.404989,,0.651940
2023-12-29,0.283527,0.003974,0.634041,0.942844,0.483813,0.338481,0.962927,0.513284,0.520513,0.338330,...,0.115590,0.172348,1.000000,0.485621,0.080781,1.000000,0.182592,0.045438,,0.007737
2023-12-30,0.074816,1.000000,0.085529,1.000000,1.000000,0.906918,0.706513,0.000000,0.984475,0.008376,...,0.138852,1.000000,0.959185,1.000000,0.977617,0.017767,0.902422,0.667796,,0.010618


# 2. Mask out water temperature if cloud cover > 25%

In [6]:
# water temperature
period_range = pd.date_range("2003-01-01", "2023-12-31")
water_temp = pd.read_csv(water_temp_path, index_col=0, parse_dates=True).loc[period_range]
water_temp

Unnamed: 0,799,3114,7889,2516,12262,1519,3053,1203,3350,3607,...,300000430,278,293,300000771,378,473,309,141,212,170
2003-01-01,10.936206,3.115646,5.611666,3.544674,2.329048,6.654248,21.441741,0.002366,-0.000342,23.992869,...,-0.000342,5.503392,17.220549,0.654091,0.880383,-0.000342,0.620327,-0.000342,-0.000342,-0.000342
2003-01-02,10.932021,3.117595,5.637918,3.497143,2.254736,6.651057,21.696950,0.002484,-0.000342,24.453548,...,-0.000342,5.431835,17.492769,0.576524,0.641766,-0.000342,0.499218,-0.000342,-0.000342,-0.000342
2003-01-03,11.062538,2.843380,5.043739,3.288396,1.971878,6.179006,21.706449,0.003330,-0.000342,24.048551,...,-0.000342,5.440633,17.271219,0.631439,0.661515,-0.000342,0.454298,-0.000342,-0.000342,-0.000342
2003-01-04,11.407601,2.683357,4.367282,2.908139,1.654043,5.340158,21.585805,0.004003,-0.000342,23.049720,...,-0.000342,5.497761,13.514102,0.473760,0.725111,-0.000342,0.477011,-0.000342,-0.000342,-0.000342
2003-01-05,11.641747,2.488387,3.674479,2.663140,1.300885,4.575245,21.475432,0.005725,-0.000342,23.132607,...,-0.000342,5.538794,12.236690,0.371581,0.784981,-0.000342,0.463809,-0.000342,-0.000342,-0.000342
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,15.396758,3.625541,7.035598,4.104998,2.925948,7.781769,21.977073,4.061035,3.459507,21.349592,...,-0.000342,6.969979,18.580343,1.398393,3.148971,-0.000342,0.764114,-0.000342,-0.000342,-0.000342
2023-12-28,15.307682,3.667067,6.700112,4.063137,2.896821,7.396017,22.026259,4.053041,3.504207,21.951714,...,-0.000342,6.988318,17.668394,1.423019,2.999573,-0.000342,0.752726,-0.000342,-0.000342,-0.000342
2023-12-29,15.418274,3.617657,6.235015,4.023334,2.839454,6.898298,22.057891,3.977074,4.106709,22.446399,...,-0.000342,7.015534,16.474875,1.389183,2.998324,-0.000342,0.720556,-0.000342,-0.000342,-0.000342
2023-12-30,15.608841,3.492535,5.829605,3.927026,2.643310,6.596595,22.167544,3.873746,4.164867,22.942908,...,-0.000342,7.067787,14.418345,1.189809,3.031629,-0.000342,0.705289,-0.000342,-0.000342,-0.000342


In [12]:
# mask out cloud cover > 25%
water_temp_cloud = water_temp.mask(cloud_cover_df > 0.25)
water_temp_cloud

Unnamed: 0,799,3114,7889,2516,12262,1519,3053,1203,3350,3607,...,300000430,278,293,300000771,378,473,309,141,212,170
2003-01-01,10.936206,,,,,,,,,,...,,,17.220549,,,,,-0.000342,-0.000342,
2003-01-02,10.932021,,,,,,,,,,...,,5.431835,,,,,,,-0.000342,-0.000342
2003-01-03,,2.843380,5.043739,3.288396,,6.179006,21.706449,,,,...,,,,,,-0.000342,,,-0.000342,
2003-01-04,,,4.367282,2.908139,1.654043,5.340158,21.585805,,,23.049720,...,,,13.514102,,,,,,-0.000342,-0.000342
2003-01-05,11.641747,,3.674479,,,,21.475432,,,23.132607,...,,,12.236690,,,,0.463809,,-0.000342,-0.000342
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,15.396758,,,,,,,,3.459507,21.349592,...,-0.000342,,,,,-0.000342,,,-0.000342,-0.000342
2023-12-28,,,,,,,,4.053041,,21.951714,...,,,,,2.999573,,0.752726,,-0.000342,
2023-12-29,,3.617657,,,,,,,,,...,-0.000342,7.015534,16.474875,,,,0.720556,-0.000342,-0.000342,-0.000342
2023-12-30,15.608841,,5.829605,,,,,3.873746,,22.942908,...,,7.067787,,,,-0.000342,,,-0.000342,-0.000342


In [14]:
water_temp_cloud.to_csv(water_temp_cloud_path)