In [1]:
import netCDF4 as nc
import numpy as np
from cdo import *
import os
import calendar
import xarray as xr
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [5]:
# Defined constants based on today's date and not required as input from the user
today = datetime.today().strftime('%Y%m%d')
yesterday = (datetime.today() - timedelta(days=1))
year_0 = yesterday.year
month_0 = yesterday.month
yesterday_str = yesterday.strftime('%Y%m%d')

# Which daily CFS run cycles would we like to download, process, and add to our csv basin files
utc = ['00','06','12','18']

In [6]:
## USER INPUTS ##
dir = f'C:/Users/fitzpatrick/Desktop/Data/{today}/'
mask_file = 'C:/Users/fitzpatrick/Desktop/Data/Input/GL_mask.nc'

In [7]:
def get_files(directory, where, format):
    """
    Get a list of all GRIB2 files in the specified directory.

    Parameters:
    - directory: Path to the directory containing the GRIB2 files.
    - format: either '.grb2' or '.nc'
    Returns:
    - List of file paths to the GRIB2 files.
    """
    files = []
    for file_name in os.listdir(directory):
        if where == 'ends':
            if file_name.endswith(format):
                file_path = os.path.join(directory, file_name)
                files.append(file_path)
        elif where == 'starts':
            if file_name.startswith(format):
                file_path = os.path.join(directory, file_name)
                files.append(file_path)
    return files

In [8]:
# This  function uses spherical trigonometry and 
# takes into account the Earth's shape and latitude-dependent distortion.
# Differences between this function and the function above are only seen 
# Near the northern and southern edges of the domain.

def calculate_grid_cell_areas(lon, lat):
    # Calculate grid cell areas
    # Assuming lat and lon are 1D arrays
    # Convert latitude to radians

    R = 6371000.0  # Radius of Earth in meters
    lat_rad = np.radians(lat)

    # Calculate grid cell width in radians
    dlat = np.radians(lat[1] - lat[0])
    dlon = np.radians(lon[1] - lon[0])

    # Calculate area of each grid cell in square kilometers
    area = np.zeros((len(lat), len(lon)))
    for i in range(len(lat)):
        for j in range(len(lon)):
            area[i, j] = R**2 * dlat * dlon * np.cos(lat_rad[i])

    return area

In [9]:
# ET = kg/(m^2*time^1) or 1 mm
# LE = MJ/(M^2*time^1)
# λ  = MJ/kg

# Latent heat of vaporization varies slightly with temperature. Allen et al. (1998) provides an equation 
# for calculating λ with air  temperature variation. Temperature in this case must be in degrees Celcius.

# λ=2.501−(2.361×10−3)×Temp Celcius

# so for our data with Temp in Kelvin...

# λ=2.501−((2.361×10−3)×(Temp-273.15))

# Our variable_lhf is in W/m^2 or J/(m^2*time^1). In order to convert to MJ we must multiply by 10^-6 or 
# 0.000001. Now we have lamba and variable_lhf both in terms of MJ.

# Boiling all this down we get these final equations below that provide us with a final evaporation for 
# the mean daily variable_lfh values in kg/m^2 or millimeters of water. 

def calculate_evaporation(temperature_K, latent_heat):
    lamda=(2.501-(0.002361*(temperature_K-273.15)))
    evaporation_rate=((latent_heat)*0.000001)/lamda

    return evaporation_rate

In [10]:
mask_variable = ['eri_lake','eri_land',
                 'hur_lake','hur_land',
                 'ont_lake','ont_land',
                 'mic_lake','mic_land',
                 'sup_lake','sup_land']
flx_variables = ['TMP_2maboveground','LHTFL_surface']
pgb_variables = ['APCP_surface']


df_apcp_forecasts = pd.DataFrame(columns=['cfs_run', 'forecast_year', 'forecast_month'] + mask_variable)
df_tmp_forecasts = pd.DataFrame(columns=['cfs_run', 'forecast_year', 'forecast_month'] + mask_variable)
df_evap_forecasts = pd.DataFrame(columns=['cfs_run', 'forecast_year', 'forecast_month'] + mask_variable)

In [11]:
# Pull the lat/lon from the mask file and calculate the grid cell areas
ds_mask = nc.Dataset(mask_file)
lat = ds_mask.variables['latitude'][:]
lon = ds_mask.variables['longitude'][:]
area = calculate_grid_cell_areas(lon, lat)
ds_mask.close()

In [12]:
## This section pulls APCP data from the PGB files
## ACPC_surface is in kg/m2
process_dir = f'{dir}/CFS/processed/'

# Find all the files the pgb files in the directory
file_list = get_files(process_dir, 'starts', 'pgb')

# Open mask file
ds_mask = xr.open_dataset(mask_file)

index = 0 #this is the row in the dataframe df_apcp_forecasts

for file in file_list:
    #open the files outside of the function
    ds_pgb = xr.open_dataset(file)
    apcp = ds_pgb.variables['APCP_surface'][:]
    file_parts = file.split('.')
    cfs_run = file_parts[2]

    for forecast in range(0,10):
        for mask_name in mask_variable:
            mask = ds_mask.variables[mask_name][:]
            df_apcp_forecasts.loc[index, 'cfs_run'] = cfs_run
            fmonth = yesterday + relativedelta(months=forecast)
            fmonth_year = fmonth.strftime('%Y')
            fmonth_num = fmonth.strftime('%m')
            df_apcp_forecasts.loc[index, 'forecast_year'] = fmonth_year
            df_apcp_forecasts.loc[index, 'forecast_month'] = fmonth_num
            pcp_sum = np.sum(apcp[forecast,:,:]*area*mask*4*31) 
            #pcp_avg = pcp_sum / np.sum(area*mask)
            df_apcp_forecasts.loc[index, mask_name] = pcp_sum.data #pcp_avg.data
            
        index += 1

    ds_pgb.close()
ds_mask.close()

df_apcp_forecasts.to_csv(dir+f'CFS_APCP_forecasts_Sums_{yesterday_str}.csv',sep=',',index=False)
print("CSV created with precipitation data.")


Cannot find the ecCodes library


CSV created with precipitation data.


In [13]:
## This section pulls TMP and LHTFL data from the FLX files

file_list = get_files(process_dir, 'starts', 'flx')
ds_mask = xr.open_dataset(mask_file)

counter = 0 #this is the row in the dataframe df_apcp_forecasts
for file in file_list:
    #open the files outside of the function
    ds_flx = xr.open_dataset(file)
    tmp = ds_flx.variables['TMP_2maboveground'][:]
    lhtfl = ds_flx.variables['LHTFL_surface'][:]
    evap = calculate_evaporation(tmp, lhtfl)
    file_parts = file.split('.')
    cfs_run = file_parts[2]

    for forecast in range(0,10):
        for mask_name in mask_variable:
            mask = ds_mask.variables[mask_name][:]
            fmonth = yesterday + relativedelta(months=forecast)
            days_in_month = calendar.monthrange(fmonth.year, fmonth.month)[1]
            fmonth_year = fmonth.strftime('%Y')
            fmonth_num = fmonth.strftime('%m')
            df_tmp_forecasts.loc[counter,'cfs_run'] = cfs_run
            df_tmp_forecasts.loc[counter, 'forecast_year'] = fmonth_year
            df_tmp_forecasts.loc[counter, 'forecast_month'] = fmonth_num
            tmp_avg = np.mean(tmp[forecast,:,:]*mask)
            df_tmp_forecasts.loc[counter, mask_name] = tmp_avg.data

            df_evap_forecasts.loc[counter,'cfs_run'] = cfs_run
            df_evap_forecasts.loc[counter, 'forecast_year'] = fmonth_year
            df_evap_forecasts.loc[counter, 'forecast_month'] = fmonth_num
            evap_sum = np.sum(evap[forecast,:,:]*area*mask) *86400 * days_in_month #convert to monthly
            #evap_avg = evap_sum / np.sum(area*mask)
            df_evap_forecasts.loc[counter, mask_name] = evap_sum.data #evap_avg.data

        counter += 1

    ds_flx.close()
ds_mask.close()

df_tmp_forecasts.to_csv(dir+f'CFS_TMP_forecasts_Avgs_{yesterday_str}.csv',sep=',',index=False)
print("CSV created with temperature data.")
df_evap_forecasts.to_csv(dir+f'CFS_EVAP_forecasts_Sums_{yesterday_str}.csv',sep=',',index=False)
print("CSV created with evaporation data.")

CSV created with temperature data.
CSV created with evaporation data.
