# Download/Pre-process CFS forecast data
Lindsay Fitzpatrick
ljob@umich.edu
08/28/2024

This script reads downloads CFS forecast data from the AWS as grib2 files. It then opens the grib2 files, calculates total basin, lake, and land, precipitation, evaporation, and average 2m air temperature. These calculations are then added to the running CSV files. This script needs the following files:

- GL_mask.nc
- CFS_EVAP_forecasts_Sums.csv
- CFS_PCP_forecasts_Sums.csv
- CFS_TMP_forecasts_Avgs.csv

In [1]:
from datetime import datetime, timedelta
import os
import sys
import boto3
from botocore import UNSIGNED
from botocore.config import Config
import cfgrib
import pandas as pd
import netCDF4 as nc
import numpy as np
import shutil
import urllib.request
from bs4 import BeautifulSoup
import requests
import calendar

## User Inputs

In [70]:
# Path to download data to
dir = 'C:/Users/fitzpatrick/Desktop/Data/cnbs/'
# Location of the mask file
mask_file = dir + 'mask_cfsr_0.31_all.nc'

# Location of existing CSV files or path/name to new CSV files
tmp_csv = dir + 'CFS_TMP_forecasts_Avgs_K_2.csv'
evap_csv = dir + 'CFS_EVAP_forecasts_Avgs_MM_2.csv'
pcp_csv = dir + 'CFS_PCP_forecasts_Avgs_MM_2.csv'

source = 'aws' # 'aws' or 'ncei'

# IF YOU ARE CREATING NEW CSV FILES:
# Then you need to define the start and end dates
# IF YOU ARE ADDING TO EXISTING CSV FILES:
# Then these dates will be ignored and the script will automatically pull
# the last date from the existing CSV files and continue the forecast from there.
start_date = '2024-01-01'
end_date = '2025-02-01'

Presets

These shouldn't change unless the location changes for CFS data or the user wants different files (products specifies the prefix of the files. Different files contain different variables) or a specific forecast (utc specifies the forecast time).

In [59]:
## Presets ##
products = ['pgb','flx']
utc = ['00','06','12','18']

# Define mask variables
mask_variables = ['eri_lake','eri_land',
                 'ont_lake','ont_land',
                 'mih_lake','mih_land',
                 'sup_lake','sup_land']

#AWS bucket name to locate the CFS forecast
bucket_name = 'noaa-cfs-pds'

## Define Functions

This function goes to the AWS site and downloads the needed CFS files for a given forecast day.

In [60]:
def download_grb2_aws(product, bucket_name, url_path, download_dir):
    """
    Download the CFS forecast from AWS

    Parameters:
    - product: 'flx' or 'pgb'
    - bucket_name: for CFS data it is 'noaa-cfs-pds'
    - url_path: the url path to data
    - download_dir: location to download data to
    """
    num_files_downloaded = 0

    # Create a boto3 client for S3
    s3_config = Config(signature_version=UNSIGNED)
    s3 = boto3.client('s3', config=s3_config)

    # List all objects in the specified folder path
    continuation_token = None
    objects = []

    # Use a loop to handle pagination
    while True:
        list_objects_args = {'Bucket': bucket_name, 'Prefix': url_path}
        if continuation_token:
            list_objects_args['ContinuationToken'] = continuation_token

        list_objects_response = s3.list_objects_v2(**list_objects_args)

        objects.extend(list_objects_response.get('Contents', []))

        if not list_objects_response.get('IsTruncated', False):
            break

        continuation_token = list_objects_response.get('NextContinuationToken')

    # Iterate over each object and download if it ends with '.grb2'
    for obj in objects:
        key = obj['Key']
        if product in key and key.endswith('grib.grb2'): #if key.endswith('.grb2'):
            local_file_path = os.path.join(download_dir, os.path.relpath(key, url_path))

            # Ensure the directory structure exists
            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

            # Download the file
            s3.download_file(bucket_name, key, local_file_path)
            num_files_downloaded += 1

            print(f"Downloaded: {key}")

In [5]:
def download_grb2_ncei(product, url_path, download_dir):

    # File counter
    num_files_downloaded = 0

    try:
        response = urllib.request.urlopen(url_path)
        html_content = response.read().decode('utf-8')
        soup = BeautifulSoup(html_content, 'html.parser')
        links = soup.find_all('a', href=lambda href: href and href.startswith(product) and href.endswith('grib.grb2'))
        
        for link in links:
            file_url = url_path + link['href']
            filename = link['href'].split('/')[-1]
            file_path = os.path.join(download_dir, filename)
            urllib.request.urlretrieve(file_url, file_path)
            print(f"Downloaded: {filename}")

            num_files_downloaded += 1

    except Exception as e:
        print(f"An error occurred: {e}")

In [6]:
def initialize_dataframes(tmp_csv, evap_csv, pcp_csv):
    """
    Initialize new DataFrames if CSV files do not exist.
    """
    if os.path.exists(tmp_csv):
        df_tmp_forecasts = pd.read_csv(tmp_csv)
    else:
        df_tmp_forecasts = pd.DataFrame(columns=['cfs_run', 'forecast_year', 'forecast_month'] + mask_variables)
    
    if os.path.exists(evap_csv):
        df_evap_forecasts = pd.read_csv(evap_csv)
    else:
        df_evap_forecasts = pd.DataFrame(columns=['cfs_run', 'forecast_year', 'forecast_month'] + mask_variables)
    
    if os.path.exists(pcp_csv):
        df_pcp_forecasts = pd.read_csv(pcp_csv)
    else:
        df_pcp_forecasts = pd.DataFrame(columns=['cfs_run', 'forecast_year', 'forecast_month'] + mask_variables)
    
    return df_tmp_forecasts, df_evap_forecasts, df_pcp_forecasts

Function to grab a specific list of files based on the prefix or suffix of a file (ie. 'pgb', '.grb2')

In [7]:
def get_files(directory, affix, identifier):
    """
    Get a list of all GRIB2 files in the specified directory.

    Parameters:
    - directory (str): Path to the directory containing files.
    - affix (str): 'prefix' or 'suffix'
    - identifier (str):  (ie. 'pgb', 'flx', '.grb2', or '.nc')
    Returns:
    - List of file paths to the GRIB2 files.
    """
    files = []
    for file_name in os.listdir(directory):
        if affix == 'suffix': # ends with
            if file_name.endswith(identifier):
                file_path = os.path.join(directory, file_name)
                files.append(file_path)
        elif affix == 'prefix': # begins with
            if file_name.startswith(identifier):
                file_path = os.path.join(directory, file_name)
                files.append(file_path)
    return files

In [10]:
def create_directory(directory):
    """Create a directory if it doesn't already exist."""
    try:
        # Check if the directory already exists
        if not os.path.exists(directory):
            os.makedirs(directory)
            print(f"Directory '{directory}' created.")
        else:
            print(f"Directory '{directory}' already exists.")
    except Exception as e:
        print(f"An error occurred: {e}")

Function to delete the directory with CFS grb2 files because they are not needed after calculations are saved in the CSVs.

In [11]:
def delete_directory(directory_path):
    # Check if the directory exists
    if not os.path.isdir(directory_path):
        print(f"The directory {directory_path} does not exist.")
        return
    try:
        # Remove the entire directory tree
        shutil.rmtree(directory_path)
        print(f"Successfully deleted the directory and all its contents: {directory_path}")
    except Exception as e:
        print(f"Error deleting {directory_path}: {e}")

In [12]:
def check_url_exists(url):
    try:
        response = requests.head(url)
        # Check if the response is OK (status code 200)
        return response.status_code == 200
    except requests.RequestException:
        return False

Function to calculate the grid cell areas [m2] based on the mask file. This is needed to calculate total precipitation and evaporation because the units are [kg/m2].

In [36]:
def calculate_grid_cell_areas(lon, lat):
    # Calculate grid cell areas
    # Assuming lat and lon are 1D arrays
    # Convert latitude to radians

    R = 6371000.0  # Radius of Earth in meters
    lat_rad = np.radians(lat)

    # Calculate grid cell width in radians
    dlat = np.radians(lat[1] - lat[0])
    dlon = np.radians(lon[1] - lon[0])

    # Calculate area of each grid cell in square kilometers
    area = np.zeros((len(lat), len(lon)))
    for i in range(len(lat)):
        for j in range(len(lon)):
            area[i, j] = R**2 * dlat * dlon * np.cos(lat_rad[i])

    return area

Function to calculate evaporation based on the 2m air temperature and latent heat flux.

In [37]:
# ET = kg/(m^2*time^1) or 1 mm
# LE = MJ/(M^2*time^1)
# λ  = MJ/kg

# Latent heat of vaporization varies slightly with temperature. Allen et al. (1998) provides an equation 
# for calculating λ with air  temperature variation. Temperature in this case must be in degrees Celcius.

# λ=2.501−(2.361×10−3)×Temp Celcius

# so for our data with Temp in Kelvin...

# λ=2.501−((2.361×10−3)×(Temp-273.15))

# Our variable_lhf is in W/m^2 or J/(m^2*time^1). In order to convert to MJ we must multiply by 10^-6 or 
# 0.000001. Now we have lamba and variable_lhf both in terms of MJ.

# Equation below will provide an evaporation rate in kg/m2 per s. 

def calculate_evaporation(temperature_K, latent_heat):
    lamda=(2.501-(0.002361*(temperature_K-273.15)))
    evaporation_rate=((latent_heat)*0.000001)/lamda

    return evaporation_rate # kg/m2 per s

Function to open each of the grib2 files and calculate the total precipitation, total evaporation, and average 2m air temperature over an entire basin, land, or lake for each of the Great Lakes. This uses the mask file to calculate each of these.

In [53]:
def process_grib_files(download_dir, df_tmp_forecasts, df_evap_forecasts, df_pcp_forecasts, mask_lat, mask_lon, mask_ds, mask_variables, area, calculate_evaporation):
    # Find all the .grb2 files in the directory
    file_list = get_files(download_dir, 'suffix', '.grb2')
    index = len(df_tmp_forecasts) if not df_tmp_forecasts.empty else 0  # Picks up on the last line of the CSV

    for grib2_file in file_list:

        filename = os.path.basename(grib2_file)
        parts = filename.split('.')
        cfs_run = parts[2]
        date_part = parts[3]  # Assuming parts[2] is in the format YYYYMM
        forecast_year = date_part[:4]
        forecast_month = date_part[4:6]

        if filename.startswith('flxf'):

            # Open the flx file at the 2m level to pull the 2m air temperature
            flx_2mabove = cfgrib.open_dataset(grib2_file, engine='cfgrib', filter_by_keys={'typeOfLevel': 'heightAboveGround', 'level': 2})
            df_tmp_forecasts.loc[index, 'cfs_run'] = cfs_run
            df_tmp_forecasts.loc[index, 'forecast_year'] = forecast_year
            df_tmp_forecasts.loc[index, 'forecast_month'] = forecast_month
            mean2t = flx_2mabove['mean2t']

            # Cut the variable to the mask domain
            mean2t_cut = mean2t.sel(
                latitude=slice(mask_lat.max(), mask_lat.min()),
                longitude=slice(mask_lon.min(), mask_lon.max())
            )
            # Remap and upscale the variable to match the mask domain
            mean2t_remap = mean2t_cut.interp(latitude=mask_lat, longitude=mask_lon, method='linear')
            
            # Calculate mean2t for each of the mask variables (i.e., eri_lake, eri_basin, etc.)
            for mask_var in mask_variables:

                # Have to change the mask from fractional to all 1s and 0s
                mask_tmp = mask_ds.variables[mask_var][:]
                mask = np.ma.masked_where(np.isnan(mask_tmp), np.ones_like(mask_tmp))

                # Take the mean over the mask area
                tmp_avg = np.mean(mean2t_remap * mask)

                df_tmp_forecasts.loc[index, mask_var] = tmp_avg.data

            ###############################################################################

            # Open the flx file again but at the surface level to pull the latent heat flux
            flx_surface = cfgrib.open_dataset(grib2_file, engine='cfgrib', filter_by_keys={'typeOfLevel': 'surface'})
            df_evap_forecasts.loc[index, 'cfs_run'] = cfs_run
            df_evap_forecasts.loc[index, 'forecast_year'] = forecast_year
            df_evap_forecasts.loc[index, 'forecast_month'] = forecast_month
            mslhf = flx_surface['mslhf']
            
            # Cut the variable to the mask domain
            mslhf_cut = mslhf.sel(
                latitude=slice(mask_lat.max(), mask_lat.min()),
                longitude=slice(mask_lon.min(), mask_lon.max())
            )
            # Remap and upscale the variable to match the mask domain
            mslhf_remap = mslhf_cut.interp(latitude=mask_lat, longitude=mask_lon, method='linear')

            # Calculate evaporation across the entire domain using air temp and latent heat flux
            evap = calculate_evaporation(mean2t_remap, mslhf_remap)
            
            year = int(forecast_year)
            month = int(forecast_month)

            _, num_days = calendar.monthrange(year, month)

            # Calculate evaporation for each of the mask variables (i.e., eri_lake, eri_land, etc.)
            for mask_var in mask_variables:
                
                mask = mask_ds.variables[mask_var][:]
                total_evap = (np.sum(evap * area * mask)) * num_days * 86400 # total in kg/sm2 * num of seconds in a month = kg/m2 or mm
                evap_mm = total_evap / np.sum(mask * area)

                df_evap_forecasts.loc[index, mask_var] = evap_mm.data

        ###############################################################################

        elif filename.startswith('pgbf'):

            # Open the pgb file at the surface level to pull the precipitation
            pgb_surface = cfgrib.open_dataset(grib2_file, engine='cfgrib', filter_by_keys={'typeOfLevel': 'surface'})
            df_pcp_forecasts.loc[index, 'cfs_run'] = cfs_run
            df_pcp_forecasts.loc[index, 'forecast_year'] = forecast_year
            df_pcp_forecasts.loc[index, 'forecast_month'] = forecast_month
            pcp = pgb_surface['tp']  # Total precipitation
            
            # Cut the variable to the mask domain
            pcp_cut = pcp.sel(
                latitude=slice(mask_lat.max(), mask_lat.min()),
                longitude=slice(mask_lon.min(), mask_lon.max())
            )
            # Remap and upscale the variable to match the mask domain
            pcp_remap = pcp_cut.interp(latitude=mask_lat, longitude=mask_lon, method='linear')

            year = int(forecast_year)
            month = int(forecast_month)

            _, num_days = calendar.monthrange(year, month)

            for mask_var in mask_variables:
                mask = mask_ds.variables[mask_var][:]
                
                print(pcp.shape, mask.shape, area.shape)
                total_pcp = (np.sum(pcp_remap * mask * area)) * 4 * num_days # pcp is for 6 hours - convert to monthly
                pcp_mm = total_pcp / np.sum(mask * area) # kg/m2 or mm

                df_pcp_forecasts.loc[index, mask_var] = pcp_mm.data

        print(f'Done with {filename}')

        index += 1

## Begin Script

Open the mask file. Pull the latitude and longitude to be used to cut the global variable down to just the Great Lakes domain and upscale. Also calculates area of each of the grid cells.

In [61]:
create_directory(dir)

Directory 'C:/Users/fitzpatrick/Desktop/Data/cnbs/' already exists.


In [71]:
# Open existing CSVs or create empty dataframes to save to new CSVs
df_tmp_forecasts, df_evap_forecasts, df_pcp_forecasts = initialize_dataframes(tmp_csv, evap_csv, pcp_csv)

# If we are starting a new CSV, then user must input dates above to pull data
if df_tmp_forecasts.empty:
    print("Creating new csv files.")
    start_date_i = datetime.strptime(start_date, "%Y-%m-%d") # User input above
    end_date_i = datetime.strptime(end_date, "%Y-%m-%d") # User input above
else:
    # If we are adding to an existing CSV, then pull the last date from the CSV
    # and continue from there
    print("Opening existing csv files.")
    last_cfs = df_tmp_forecasts['cfs_run'].astype(str).iloc[-1][:8]
    start_date_i = datetime.strptime(last_cfs, '%Y%m%d') + timedelta(days=1)
    # Pull all the forecasts days up to yesterday (the most complete forecast)
    end_date_i = datetime.now() - timedelta(days=1)

# Check if start_date is equal to or after end_date
if start_date_i == end_date_i:
    print("The csv files are up-to-date.")
    sys.exit()  # Stop the script
elif start_date_i > end_date_i:
    print("There is an error in the input dates. Check them and try again.")
    sys.exit()  # Stop the script
else:
    print(f"Starting from: {start_date_i.strftime('%Y-%m-%d')} and continuing through: {end_date_i.strftime('%Y-%m-%d')}")

# Create a date range
date_array = pd.date_range(start=start_date_i, end=end_date_i)

Opening existing csv files.
Starting from: 2024-02-07 and continuing through: 2025-02-02


In [68]:
# Open the mask file and calculate the grid cell areas
mask_ds = nc.Dataset(mask_file)
mask_lat = mask_ds.variables['latitude'][:]
mask_lon = mask_ds.variables['longitude'][:]
area = calculate_grid_cell_areas(mask_lon, mask_lat)

Begin loop to go through the user input dates. Loop creates a directory to download the CFS grib files, runs through the download_grb2_aws funtion to download and then run through the process_grib_files to do the calculations. It then saves the calculations to the CSV files, deletes the grib2 files and moves on to the next date.

In [72]:
for date in date_array:
    print(f"Beginning {date}.")
    YYYY = date.strftime("%Y")
    YYYYMM = date.strftime("%Y%m")
    YYYYMMDD = date.strftime("%Y%m%d")

    date = date.strftime('%Y%m%d')
    download_dir = f'{dir}{date}/CFS/'

    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    # Download the grib2 files using AWS or NCEI
    for utc_time in utc:
        for product in products:
            if source == 'aws':
                url_path = f'cfs.{date}/{utc_time}/monthly_grib_01/'
                download_grb2_aws(product, bucket_name, url_path, download_dir)
            elif source == 'ncei':
                base_url = 'https://www.ncei.noaa.gov/data/climate-forecast-system/access/operational-9-month-forecast/monthly-means/'
                url_path = f'{base_url}/{YYYY}/{YYYYMM}/{YYYYMMDD}/{YYYYMMDD}{utc_time}/'
                if not url_path or not check_url_exists(url_path):
                    print(f"No files available for {date}.")
                else:
                    download_grb2_ncei(product, url_path, download_dir)
            else:
                print('Input source does not exist. Source must be aws or ncei.')

    process_grib_files(download_dir, df_tmp_forecasts, df_evap_forecasts, df_pcp_forecasts, mask_lat, mask_lon, mask_ds, mask_variables, area, calculate_evaporation)   
    
    # Save the updated DataFrames to CSV files
    print(df_tmp_forecasts)

    df_tmp_forecasts.to_csv(tmp_csv, sep=',', index=False)
    df_evap_forecasts.to_csv(evap_csv, sep=',', index=False)
    df_pcp_forecasts.to_csv(pcp_csv, sep=',', index=False)

    # Delete downloaded grib2 files
    #delete_directory(download_dir)
    
    print(f'Done with {date}.')

Beginning 2024-02-07 00:00:00.
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202402.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202403.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202404.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202405.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202406.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202407.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202408.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202409.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202410.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/pgbf.01.2024020700.202411.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthly_grib_01/flxf.01.2024020700.202402.avrg.grib.grb2
Downloaded: cfs.20240207/00/monthl

Ignoring index file 'C:/Users/fitzpatrick/Desktop/Data/cnbs/20240207/CFS/flxf.01.2024020700.202402.avrg.grib.grb2.da267.idx' older than GRIB file


Downloaded: cfs.20240207/18/monthly_grib_01/flxf.01.2024020718.202411.avrg.grib.grb2


  df_tmp_forecasts.loc[index, 'cfs_run'] = cfs_run
  df_tmp_forecasts.loc[index, 'forecast_year'] = forecast_year
  df_tmp_forecasts.loc[index, 'forecast_month'] = forecast_month
Ignoring index file 'C:/Users/fitzpatrick/Desktop/Data/cnbs/20240207/CFS/flxf.01.2024020700.202402.avrg.grib.grb2.5b7b6.idx' older than GRIB file
  df_evap_forecasts.loc[index, 'cfs_run'] = cfs_run
  df_evap_forecasts.loc[index, 'forecast_year'] = forecast_year
  df_evap_forecasts.loc[index, 'forecast_month'] = forecast_month


Done with flxf.01.2024020700.202402.avrg.grib.grb2
Done with flxf.01.2024020700.202403.avrg.grib.grb2
Done with flxf.01.2024020700.202404.avrg.grib.grb2
Done with flxf.01.2024020700.202405.avrg.grib.grb2
Done with flxf.01.2024020700.202406.avrg.grib.grb2
Done with flxf.01.2024020700.202407.avrg.grib.grb2
Done with flxf.01.2024020700.202408.avrg.grib.grb2
Done with flxf.01.2024020700.202409.avrg.grib.grb2
Done with flxf.01.2024020700.202410.avrg.grib.grb2
Done with flxf.01.2024020700.202411.avrg.grib.grb2
Done with flxf.01.2024020706.202402.avrg.grib.grb2
Done with flxf.01.2024020706.202403.avrg.grib.grb2
Done with flxf.01.2024020706.202404.avrg.grib.grb2
Done with flxf.01.2024020706.202405.avrg.grib.grb2
Done with flxf.01.2024020706.202406.avrg.grib.grb2
Done with flxf.01.2024020706.202407.avrg.grib.grb2
Done with flxf.01.2024020706.202408.avrg.grib.grb2
Done with flxf.01.2024020706.202409.avrg.grib.grb2
Done with flxf.01.2024020706.202410.avrg.grib.grb2
Done with flxf.01.2024020706.20

  df_pcp_forecasts.loc[index, 'cfs_run'] = cfs_run
  df_pcp_forecasts.loc[index, 'forecast_year'] = forecast_year
  df_pcp_forecasts.loc[index, 'forecast_month'] = forecast_month


(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
Done with pgbf.01.2024020700.202402.avrg.grib.grb2
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
Done with pgbf.01.2024020700.202403.avrg.grib.grb2
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
(181, 360) (576, 1152) (576, 1152)
Done with pgbf.01.2024020700.202404.avrg.grib.grb2
(181, 3

KeyboardInterrupt: 

In [55]:
print(df_tmp_forecasts)

       cfs_run forecast_year forecast_month   eri_basin    eri_lake  \
0   2024091800          2024             09  290.438022  290.718765   
1   2024091800          2024             10  285.436517  285.765962   
2   2024091800          2024             11  280.460295  280.774295   
3   2024091800          2024             12  270.470198  271.444908   
4   2024091800          2025             01  265.423315  266.663449   
5   2024091800          2025             02  269.623548  269.987015   
6   2024091800          2025             03  269.735398   269.81036   
7   2024091800          2025             04   277.08622  276.750215   
8   2024091800          2025             05  282.630741  281.946436   
9   2024091800          2025             06  287.616239  286.923307   
10  2024091806          2024             09  289.446861  289.661428   
11  2024091806          2024             10  284.027147  284.358049   
12  2024091806          2024             11  276.323559  276.923877   
13  20

Close any open files before finishing script.

In [52]:
df_tmp_forecasts.to_csv(tmp_csv, sep=',', index=False)
df_evap_forecasts.to_csv(evap_csv, sep=',', index=False)
df_pcp_forecasts.to_csv(pcp_csv, sep=',', index=False)

In [53]:
print(df_tmp_forecasts)

       cfs_run forecast_year forecast_month   eri_basin    eri_lake  \
0   2024091800          2024             09  290.438022  290.718765   
1   2024091800          2024             10  285.436517  285.765962   
2   2024091800          2024             11  280.460295  280.774295   
3   2024091800          2024             12  270.470198  271.444908   
4   2024091800          2025             01  265.423315  266.663449   
5   2024091800          2025             02  269.623548  269.987015   
6   2024091800          2025             03  269.735398   269.81036   
7   2024091800          2025             04   277.08622  276.750215   
8   2024091800          2025             05  282.630741  281.946436   
9   2024091800          2025             06  287.616239  286.923307   
10  2024091806          2024             09  289.446861  289.661428   
11  2024091806          2024             10  284.027147  284.358049   
12  2024091806          2024             11  276.323559  276.923877   
13  20

In [14]:
mask_ds.close()