In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import ee
import geemap

In [2]:
ee.Authenticate()
ee.Initialize(project="ee-sarice")

In [3]:
cci_lakes = ee.FeatureCollection("projects/ee-sarice/assets/CCI_Lakes")

In [4]:
years = list(range(2000, 2024))

In [5]:
cci_lake_ids = cci_lakes.aggregate_array("CCI ID").getInfo()

In [6]:
out_dir = "/nas/cee-hydro/laketemp_bias/era5land/raw"

In [None]:
# split the 2024 cci lakes to 2 batches
roi_1 = cci_lakes.filter(ee.Filter.inList('CCI ID', cci_lake_ids[:1012]))
roi_2 = cci_lakes.filter(ee.Filter.inList('CCI ID', cci_lake_ids[1012:]))

for yr in years:
    era5land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR").filterDate(f"{yr}-01-01", f"{yr+1}-01-01").select(['temperature_2m', 'lake_mix_layer_temperature']).toBands()
    geemap.zonal_statistics(era5land, 
                            roi_1, 
                            f"{out_dir}/{yr}_1.csv", 
                            statistics_type='MEAN', 
                            # scale=1000
                            )
    geemap.zonal_statistics(era5land, 
                            roi_2, 
                            f"{out_dir}/{yr}_2.csv", 
                            statistics_type='MEAN', 
                            # scale=1000
                            )

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/fc652dfc5c57a4760635e5bc7457f910-d907e48fcb89a847d8dcbdc55a646145:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/era5land/2000_1.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/02fcb1175806c6c54d2f2f4e4e7d12cb-2515f29c84d3ed4083a89dc5fcc4ffa4:getFeatures
Please wait ...


In [None]:
# split the 2024 cci lakes to 2 batches
roi_1 = cci_lakes.filter(ee.Filter.inList('CCI ID', cci_lake_ids[:1012]))
roi_2 = cci_lakes.filter(ee.Filter.inList('CCI ID', cci_lake_ids[1012:]))

for yr in years:
    era5land = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR").filterDate(f"{yr}-01-01", f"{yr+1}-01-01").select(['u_component_of_wind_10m', 
                                                                                                                   'v_component_of_wind_10m',
                                                                                                                   'surface_solar_radiation_downwards_sum']).toBands()
    geemap.zonal_statistics(era5land, 
                            roi_1, 
                            f"{out_dir}/{yr}_1_wind_srad.csv", 
                            statistics_type='MEAN', 
                            # scale=1000
                            )
    geemap.zonal_statistics(era5land, 
                            roi_2, 
                            f"{out_dir}/{yr}_2_wind_srad.csv", 
                            statistics_type='MEAN', 
                            # scale=1000
                            )

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/4e12c0e2074b9da6811d393ba5dc8efe-7c99acdc01ba688b9e617990f6260a2d:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/era5land/raw/2000_1_wind_srad.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/ec24715269ee4470d95e5c2cb2bd9ec0-5ae8f37ab615a8012376f6253ccc42f8:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/era5land/raw/2000_2_wind_srad.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-sarice/tables/c51f6e07e635ce268d4c3b7c47ad8bbf-0c2cdea9809ec3db902741b9b70d3df5:getFeatures
Please wait ...
Data downloaded to /mnt/Data_2tb/laketemp_bias/era5land/raw/2001_1_wind_srad.csv
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine

# Merge air/water temperature

In [2]:
years = list(range(2000, 2024))
out_dir = "/nas/cee-hydro/laketemp_bias/era5land/raw"

In [3]:
# split air temperature & water surface temperature
df_air = pd.DataFrame([])
df_lswt = pd.DataFrame([])

for yr in years:
    df1 = pd.read_csv(f"{out_dir}/{yr}_1.csv").set_index("CCI ID").iloc[:, :-10]
    df2 = pd.read_csv(f"{out_dir}/{yr}_2.csv").set_index("CCI ID").iloc[:, :-10]
    df = pd.concat([df1, df2], axis = 0)

    air_cols = ["temperature_2m" in col for col in df.columns]
    lswt_cols = ["lake_mix_layer_temperature" in col for col in df.columns]

    # create a dataframe for air temperature
    air_df = df.loc[:, air_cols]
    # turn the column names to date time
    # and transpose the dataframe
    air_df.columns = [col.replace("_temperature_2m", "") for col in air_df.columns]
    air_df = air_df.T
    air_df.index = pd.to_datetime(air_df.index)
    air_df.columns.name = ""
    df_air = pd.concat([df_air, air_df], axis = 0)

    # create a dataframe for water temperature
    lswt_df = df.loc[:, lswt_cols]
    # turn the column names to date time
    # and transpose the dataframe
    lswt_df.columns = [col.replace("_lake_mix_layer_temperature", "") for col in lswt_df.columns]
    lswt_df = lswt_df.T
    lswt_df.index = pd.to_datetime(lswt_df.index)
    lswt_df.columns.name = ""
    df_lswt = pd.concat([df_lswt, lswt_df], axis = 0)

# Find lakes with no data
no_data_lakes = df_air.columns[df_air.isna().any()].to_numpy()
# to celecius
df_air = df_air.drop(columns = no_data_lakes) - 273.15
df_lswt = df_lswt.drop(columns = no_data_lakes) - 273.15

In [4]:
df_air.to_csv("/nas/cee-hydro/laketemp_bias/era5land/air_temp.csv")
df_lswt.to_csv("/nas/cee-hydro/laketemp_bias/era5land/water_temp.csv")

# Merge wind and srad data

In [2]:
years = list(range(2000, 2024))
out_dir = "/nas/cee-hydro/laketemp_bias/era5land/raw"

In [3]:
# split air temperature & water surface temperature
df_wind = pd.DataFrame([])
df_srad = pd.DataFrame([])

for yr in years:
    df1 = pd.read_csv(f"{out_dir}/{yr}_1_wind_srad.csv").set_index("CCI ID").iloc[:, :-10]
    df2 = pd.read_csv(f"{out_dir}/{yr}_2_wind_srad.csv").set_index("CCI ID").iloc[:, :-10]
    df = pd.concat([df1, df2], axis = 0)

    # u and v wind, solar radiation
    u_cols = ["u_component_of_wind_10m" in col for col in df.columns]
    v_cols = ["v_component_of_wind_10m" in col for col in df.columns]
    srad_cols = ["surface_solar_radiation_downwards_sum" in col for col in df.columns]

    # create a dataframe for air temperature
    u_df = df.loc[:, u_cols]
    v_df = df.loc[:, v_cols]
    # calculate wind speed
    wind_df = pd.DataFrame(np.sqrt(u_df.values ** 2 + v_df.values ** 2))
    wind_df.index = u_df.index
    srad_df = df.loc[:, srad_cols]
    
    # turn the column names to date time
    # and transpose the dataframe
    wind_df.columns = [col.replace("_u_component_of_wind_10m", "") for col in u_df.columns]
    wind_df = wind_df.T
    wind_df.index = pd.to_datetime(wind_df.index)
    wind_df.columns.name = ""
    df_wind = pd.concat([df_wind, wind_df], axis = 0)

    # create a dataframe for water temperature
    srad_df = df.loc[:, srad_cols]
    # turn the column names to date time
    # and transpose the dataframe
    srad_df.columns = [col.replace("_surface_solar_radiation_downwards_sum", "") for col in srad_df.columns]
    srad_df = srad_df.T
    srad_df.index = pd.to_datetime(srad_df.index)
    srad_df.columns.name = ""
    df_srad = pd.concat([df_srad, srad_df], axis = 0)

# Find lakes with no data
no_data_lakes = df_wind.columns[df_wind.isna().any()].to_numpy()
# to celecius
df_wind = df_wind.drop(columns = no_data_lakes) - 273.15
df_srad = df_srad.drop(columns = no_data_lakes) - 273.15

In [4]:
df_wind.to_csv("/nas/cee-hydro/laketemp_bias/era5land/wind.csv")
df_srad.to_csv("/nas/cee-hydro/laketemp_bias/era5land/srad.csv")