This script downloads all of the long-range National Water Model forecasts, from TODAY going out 30 days, and calculates the total runoff into each of the lakes.

The following file is required to run this script:
- lake_links.csv

The following is an example of the output. Output will be total runoff (cms) for each lake for each of the 4 ensemble NWM members.

              Superior     Erie         Ontario      MichHuron
       1      5160.098097  2324.209277  4796.599222  91990.107497
       2      5504.198089  2454.819275  4937.199219  92442.617487
       3      5113.598098  2306.559278  4777.599223  9928.957498
       4      7987.298033  3397.329254  5951.799196  95708.027414

In [98]:
# Import libraries
import os
import urllib.request
import urllib.error
import netCDF4
from netCDF4 import Dataset
from datetime import datetime
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import calendar
from dateutil.relativedelta import relativedelta

# Defined Functions

Function to download the NWM forecasts.

In [99]:
def download_nwm_forecast(forecast, ens_members, download_dir):
    # Initialize a file counter
    num_files_downloaded = 0

    base_url = 'https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/v3.0/'
    # This will need to be updated in the function to open using ftplib
    base_url_fallback = 'ftp://ftpprd.ncep.noaa.gov/pub/data/nccf/com/nwm/'

    for ens in range(1,ens_members+1):

      # Retrieve HTML content from the URL
        path = base_url+f'nwm.{today}/{forecast}_mem{ens}/'
        files = f'nwm.t00z.{forecast}.channel_rt_{ens}'

        response = urllib.request.urlopen(path)
        html_content = response.read().decode('utf-8')
        soup = BeautifulSoup(html_content, 'html.parser')
        links = soup.find_all('a', href=lambda href: href and href.startswith(files))
        for link in links:
            file_url = path + link['href']
            filename = link['href'].split('/')[-1]
            file_path = os.path.join(download_dir, filename)
            print("Downloading", filename)
            urllib.request.urlretrieve(file_url, file_path)
            num_files_downloaded += 1
    print(f'Total number of NWM files downloaded: {num_files_downloaded}')

In [100]:
# Function currently being worked on that would use the FTP address as a backup if Nomads were to fail.
import requests
from ftplib import FTP

def download_nwm_forecast_backup(forecast, ens_members, download_dir):
    # Initialize a file counter
    num_files_downloaded = 0

    base_url = 'https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/v3.0/'
    # This will need to be updated in the function to open using ftplib
    ftp_address = 'ftp://ftpprd.ncep.noaa.gov/pub/data/nccf/com/nwm/'
    ftp_path = '...'

    # Create the download directory if it doesn't exist
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    for ens in range(1,ens_members+1):

      # Retrieve HTML content from the URL
        path = base_url+f'nwm.{today}/{forecast}_mem{ens}/'
        files = f'nwm.t00z.{forecast}.channel_rt_{ens}'

        try:
        # Try to open the URL
            response = requests.get(path)
            response.raise_for_status()
            return response.content
        except requests.exceptions.RequestException:
            # If URL access fails, switch to FTP
            print("URL access failed, switching to FTP...")
            try:
                ftp = FTP(ftp_address)
                ftp.login()  # Log in to the FTP server
                ftp.cwd(ftp_path)  # Change to the specified path
                data = []
                ftp.retrlines('RETR ' + ftp_path, data.append)  # Retrieve the file data
                ftp.quit()  # Quit FTP connection
                return '\n'.join(data)
            except Exception as e:
                print("Failed to access FTP. Try again later.")
                return None

        soup = BeautifulSoup(html_content, 'html.parser')
        links = soup.find_all('a', href=lambda href: href and href.startswith(files))
        for link in links:
            file_url = path + link['href']
            filename = link['href'].split('/')[-1]
            file_path = os.path.join(download_dir, filename)
            print("Downloading", filename)
            urllib.request.urlretrieve(file_url, file_path)
            num_files_downloaded += 1
    print(f'Total number of NWM files downloaded: {num_files_downloaded}')

Function to calculate RUNOFF into each lake from the NWM long range forecast.

In [101]:
def calculate_runoff(ens_member, download_dir):

    #runoff_su, runoff_er, runoff_on, runoff_mh = 0, 0, 0, 0
    runoff_su = []
    runoff_er = []
    runoff_on = []
    runoff_mh = []

    # Loop through all files in the directory
    for filename in os.listdir(download_dir):
        if f'channel_rt_{ens_member}' in filename:
            file = Dataset(download_dir+filename, mode='r', format="NETCDF4")
            feature_id = file.variables['feature_id']
            streamflow = file.variables['streamflow'] #streamflow is in m3/s

            su_tmp, er_tmp, on_tmp, mh_tmp = 0, 0, 0, 0
            for f_su in id_su:
                su_tmp += streamflow[feature_id == f_su]
            print(su_tmp)
            for f_er in id_er:
                er_tmp += streamflow[feature_id == f_er]
            for f_on in id_on:
                on_tmp += streamflow[feature_id == f_on]
            for f_mh in id_mh:
                mh_tmp += streamflow[feature_id == f_mh]

            runoff_su.append(su_tmp)
            runoff_er.append(er_tmp)
            runoff_on.append(on_tmp)
            runoff_mh.append(mh_tmp)

    #Convert lists to arrays
    print(runoff_su)
    runoff_su = np.array(runoff_su)
    runoff_er = np.array(runoff_er)
    runoff_on = np.array(runoff_on)
    runoff_mh = np.array(runoff_mh)

    #Reshape the arrays using 4 as the number of values in each day
    runoff_su_daily = runoff_su.reshape(-1, 4)
    runoff_er_daily = runoff_er.reshape(-1, 4)
    runoff_on_daily = runoff_on.reshape(-1, 4)
    runoff_mh_daily = runoff_mh.reshape(-1, 4)
    print(runoff_su_daily)

    #Take the daily mean of the streamflow
    su_daily_means = np.mean(runoff_su_daily, axis=1)
    er_daily_means = np.mean(runoff_er_daily, axis=1)
    on_daily_means = np.mean(runoff_on_daily, axis=1)
    mh_daily_means = np.mean(runoff_mh_daily, axis=1)
    print(su_daily_means)

    #Sum up the days
    total_su = np.sum(su_daily_means)
    total_er = np.sum(er_daily_means)
    total_on = np.sum(on_daily_means)
    total_mh = np.sum(mh_daily_means)
    print(total_su)

    return total_su, total_er, total_on, total_mh

This function loops through the above function using all the ensemble members and then puts the total runoff for each lake into an easy to read dataframe. It could probably be combined later with the above function.

In [102]:
def calculate_runoff_all(ens_members, directory):
    ens = np.arange(1, ens_members + 1)
    df_lakes = pd.DataFrame(index=ens, columns=lakes)
    for i in range(1, ens_members+1):  # Assuming 4 columns for runoff values
        runoff_values = calculate_runoff(i, directory)
        for j, lake in enumerate(lakes):
            df_lakes.loc[i,lake] = runoff_values[j]
    return df_lakes

This function is not used in this script anymore but wanted to leave it in here for future use in the src folder.

In [103]:
def cms_to_mm(flow_rate_cms, lake, month, year):
    # Conversion factor from cubic meters per second to millimeters per month
    # 1 m³/s * (60 * 60 * 24 * days_in_month) seconds/month = cubic meters per month
    # To convert to millimeters per month, we divide by the lake area in square meters

    # Determine lake area of the input lake
    great_lakes_areas = {
        "Superior": 82097 * 1000000,
        "Mich-Huron": (57753+59565)*1000000,
        "Erie": 25655*1000000,
        "Ontario": 19009*1000000
    }

    # Determine number of days in the month using input month/year.
    year = int(year)
    month = int(month)
    if month < 1 or month > 12:
        raise ValueError("Month should be between 1 and 12.")
    # Get the number of days in the month
    days_in_month = calendar.monthrange(year, month)[1]

    conversion_factor = (60 * 60 * 24 *1000 * days_in_month) / great_lakes_areas[lake]

    # Convert flow rate from m³/s to mm/month
    total_mm = flow_rate_cms * conversion_factor

    return total_mm

# Begin Script
Preset Variables

In [104]:
## User Input
dir = f'C:/Users/fitzpatrick/Desktop/Data/'
input_dir = 'C:/Users/fitzpatrick/Desktop/Data/Input/'

# This will allow functions to be used to pull short_range, medium_range or long_range forecasts.
forecast = 'long_range'

# There are 4 ensemble members. If ens_members = 1, it will only pull the first ensember member.
# If ens_members = 4, it will pull all 4 members.
ens_members = 4

# This allows you to specify a specific lake if not all are needed.
lakes = ['Superior','Erie','Ontario','MichHuron']
link_lakes = ['SU','MIHU','ER','ON']  #names in the link file

# Grab today's date for pulling the most current NWM forecast
today = datetime.today().strftime('%Y%m%d')

# Output filename name
outfile = f"NWM_RUNOFF_forecast_{today}.csv"

In [105]:
# This will be the base directory
today_dir = f'{dir}{today}/'
if not os.path.exists(dir):
    os.makedirs(today_dir)
    print("Directory created.")
else:
    print("Directory already exists.")

# This will be where we download NWM files to
download_dir = f'{today_dir}NWM/'
if not os.path.exists(download_dir):
    os.makedirs(download_dir)
    print("NWM directory created.")
else:
    print("NWM directory already exists.")

Directory already exists.
NWM directory created.


This section reads in each of Yi's csv files that contain all of the feature_ids for the streams that discharge into each lake.

In [106]:
# Lake links file where all the feature_ids are listed

links = pd.read_csv(input_dir+'lake_links.csv',sep=',')
id_su= links['SU_ID']
id_er = links['ER_ID']
id_on = links['ON_ID']
id_mh = links['MIHU_ID']

This downloads the NWM files

In [107]:
download_nwm_forecast(forecast, ens_members, download_dir)

Downloading nwm.t00z.long_range.channel_rt_1.f006.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f012.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f018.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f024.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f030.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f036.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f042.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f048.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f054.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f060.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f066.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f072.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f078.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f084.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f090.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f096.conus.nc
Downloading nwm.t00z.long_range.channel_rt_1.f102.conus.

This opens each file and calculates the total runoff into each lake for each ensemble member (1-4).

In [108]:
# This creates the dataframe with runoff into all the lakes for all the ensemble members.
df_lakes = calculate_runoff_all(ens_members,download_dir)
print(df_lakes)

476.9699893388897
476.9699893388897
489.1999890655279
489.1999890655279
489.1999890655279
489.1999890655279
489.1999890655279
501.4299887921661
501.4299887921661
501.4299887921661
501.4299887921661
501.4299887921661
501.4299887921661
501.4299887921661
489.1999890655279
489.1999890655279
489.1999890655279
489.1999890655279
476.9699893388897
476.9699893388897
464.7399896122515
464.7399896122515
464.7399896122515
452.5099898856133
452.5099898856133
440.2799901589751
440.2799901589751
440.2799901589751
428.0499904323369
428.0499904323369
428.0499904323369
415.81999070569873
415.81999070569873
415.81999070569873
403.58999097906053
403.58999097906053
403.58999097906053
391.35999125242233
391.35999125242233
391.35999125242233
379.12999152578413
379.12999152578413
366.89999179914594
366.89999179914594
366.89999179914594
354.66999207250774
354.66999207250774
354.66999207250774
342.43999234586954
342.43999234586954
342.43999234586954
330.20999261923134
330.20999261923134
330.20999261923134
330.2

In [109]:
print(df_lakes)

      Superior         Erie      Ontario    MichHuron
1  9713.677283  9713.677283  9713.677283  9713.677283
2  9585.262286  9585.262286  9585.262286  9585.262286
3  9673.929784  9673.929784  9673.929784  9673.929784
4  9533.284787  9533.284787  9533.284787  9533.284787


In [111]:
# Saves a csv file with total flow into each lake [cms] for that time period
df_lakes.to_csv(today_dir+outfile, sep=',')

PermissionError: [Errno 13] Permission denied: 'C:/Users/fitzpatrick/Desktop/Data/20240717/NWM_RUNOFF_forecast_20240717.csv'