In [None]:
## This script pulls CFS data from AWS for YESTERDAY (most complete). It uses a subprocess function
## to call wgrib2 in order to do the file conversions. This is the easiest way to do this on a
## windows machine. It will require wgrib2 to be downloaded on that windows machine and the path will
## need to be changed to where the executable lives. On a linux or mac, the cfgrib library can be imported
## and used to do the conversion.

In [None]:
!py -m pip install boto3
!py -m pip install netCDF4



In [2]:
from datetime import datetime, timedelta
import os
import xarray as xr
from dateutil import relativedelta
import requests
from bs4 import BeautifulSoup
import urllib.request
import urllib.error
import boto3
from botocore import UNSIGNED
from botocore.config import Config
from netCDF4 import Dataset

In [None]:
# Mount my google drive when working in colab notebooks
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def download_grb2_aws(product, utc, bucket_name, folder_path, download_dir):

    num_files_downloaded = 0

    # Create a boto3 client for S3
    s3_config = Config(signature_version=UNSIGNED)
    s3 = boto3.client('s3', config=s3_config)

    # List all objects in the specified folder path
    continuation_token = None
    objects = []

    # Use a loop to handle pagination
    while True:
        list_objects_args = {'Bucket': bucket_name, 'Prefix': folder_path}
        if continuation_token:
            list_objects_args['ContinuationToken'] = continuation_token

        list_objects_response = s3.list_objects_v2(**list_objects_args)

        objects.extend(list_objects_response.get('Contents', []))

        if not list_objects_response.get('IsTruncated', False):
            break

        continuation_token = list_objects_response.get('NextContinuationToken')

    # Iterate over each object and download if it ends with '.grb2'
    for obj in objects:
        key = obj['Key']
        if product in key and key.endswith('Z.grb2'): #if key.endswith('.grb2'):
            local_file_path = os.path.join(download_dir, os.path.relpath(key, folder_path))

            # Ensure the directory structure exists
            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

            # Download the file
            s3.download_file(bucket_name, key, local_file_path)
            num_files_downloaded += 1

            print(f"Downloaded: {key}")

    print(f'Total number of CFS files downloaded from AWS: {num_files_downloaded}')

In [4]:
# Just an easy way to get a list of all the files within a directory
# Helps for running loops
def get_files(directory, format):
    """
    Get a list of all GRIB2 files in the specified directory.

    Parameters:
    - directory: Path to the directory containing the GRIB2 files.
    - format: either '.grb2' or '.nc'
    Returns:
    - List of file paths to the GRIB2 files.
    """
    files = []
    for file_name in os.listdir(directory):
        if file_name.endswith(format):
            file_path = os.path.join(directory, file_name)
            files.append(file_path)
    return files

In [5]:
## In order to convert grb2 files to netcdf on a windows machine, you need to download wgrib2.exe
## https://www.ftp.cpc.ncep.noaa.gov/wd51we/wgrib2/Windows10/v3.1.3/wgrib2.exe

import subprocess

def grb2_to_netcdf(input_file, output_file):
    # Define the command to convert GRIB2 to NetCDF using wgrib2
    # Need to download wgrib2 and write the full path unless it is set to your PATH
    command = ["C:/Users/fitzpatrick/Downloads/wgrib2", input_file, "-netcdf", output_file]

    # Execute the command
    try:
        subprocess.run(command, check=True)
        print(f"Conversion successful. NetCDF file saved as {output_file}")
    except subprocess.CalledProcessError as e:
        print(f"Conversion failed with error: {e}")

In [11]:
#Local path
download_dir = 'C:/Users/fitzpatrick/Desktop/Data/'
#Google drive path
#download_dir = '/content/drive/MyDrive/BIL SA Project/Modeling/Data-driven Modeling/Input datasets/Downloaded Data/'
products = ['pgb','flx']
utc = ['00','06','12','18']

today = datetime.today().strftime('%Y%m%d')
yesterday = (datetime.today() - timedelta(days=1)).strftime('%Y%m%d')

In [8]:
download_dir = f'{download_dir}{yesterday}/downloaded/'
if not os.path.exists(download_dir):
    os.makedirs(download_dir)
else:
    print(f"Directory already exists.")

Directory already exists.


In [9]:
# Uses the AWS to download the grib2 files
bucket_name = 'noaa-cfs-pds'

for utc in utc:
    for product in products:
        folder_path = f'cfs.{yesterday}/{utc}/monthly_grib_01/'
        download_grb2_aws(product, utc, bucket_name, folder_path, download_dir)

Directory already exists.
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202406.avrg.grib.00Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202406.avrg.grib.06Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202406.avrg.grib.12Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202406.avrg.grib.18Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202407.avrg.grib.00Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202407.avrg.grib.06Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202407.avrg.grib.12Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202407.avrg.grib.18Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202408.avrg.grib.00Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202408.avrg.grib.06Z.grb2
Downloaded: cfs.20240602/00/monthly_grib_01/pgbf.01.2024060200.202408.avrg.grib.12Z.

In [9]:
# set up a loop to convert all the grib2 files to netcdf in a given directory
grb2_files = get_files(download_dir, '.grb2')

for grib2_file in grb2_files:
    output_netcdf_file = grib2_file[:-5] + '.nc'  # Replace .grb2 with .nc in file name
    grb2_to_netcdf(grib2_file, output_netcdf_file)

Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202406.avrg.grib.00Z.nc
Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202406.avrg.grib.06Z.nc
Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202406.avrg.grib.12Z.nc
Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202406.avrg.grib.18Z.nc
Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202407.avrg.grib.00Z.nc
Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202407.avrg.grib.06Z.nc
Conversion successful. NetCDF file saved as C:/Users/fitzpatrick/Desktop/Data/20240602/downloaded/flxf.01.2024060200.202407.avrg.grib.12Z.nc
Conversion su