<a href="https://colab.research.google.com/github/chqzeng/WaterSatOnCloud/blob/main/Tool3%20-%20HRRR%20Climate%20Data%20Extraction/Tool3_HRRR_Climate_Data_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Tool3 - HRRR Climate Data Extraction

NOAA High-Resolution Rapid Refresh Data for the contiguous region of the United States, data extraction at sampling points of user-defined locations and time

Data description: https://registry.opendata.aws/noaa-hrrr-pds/

API access: https://mesowest.utah.edu/html/hrrr/

In [1]:
# Install libraries
!pip install rioxarray
!pip install s3fs
!pip install zarr
!pip install pyproj
!pip install xarray[complete]
!pip install cartopy
!pip install ecmwflibs
!pip install pytz tzwhere
!pip install odc-stac
!pip install planetary_computer

Collecting rioxarray
  Downloading rioxarray-0.14.1-py3-none-any.whl (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.5/53.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting rasterio>=1.2 (from rioxarray)
  Downloading rasterio-1.3.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
Collecting affine (from rasterio>=1.2->rioxarray)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio>=1.2->rioxarray)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Installing collected packages: snuggs, affine, rasterio, rioxarray
Successfully installed affine-2.4.0 rasterio-1.3.8 rioxarray-0.14.1 snuggs-1.4.7
Collecting s3fs
  Downloading s3fs-2023.6.0-py3-none-any.whl (28 kB)

In [2]:
# Load libraries
import pystac_client
import planetary_computer
import rioxarray
from datetime import timedelta, datetime
import numpy as np
import pandas as pd
from pathlib import Path
import pytz
from tzwhere import tzwhere
import s3fs
import zarr
import cartopy.crs as ccrs
import xarray as xr

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)
tzwhere = tzwhere.tzwhere()

  self.timezoneNamesToPolygons[tzname] = WRAP(polys)


In [3]:
fs = s3fs.S3FileSystem(anon=True)
chunk_index = xr.open_zarr(s3fs.S3Map("s3://hrrrzarr/grid/HRRR_chunk_index.zarr", s3=fs))

projection = ccrs.LambertConformal(central_longitude=-97.5,
                             central_latitude=38.5,
                             standard_parallels=[38.5])

def get_nearest_point(projection, chunk_index, longitude, latitude):
    x, y = projection.transform_point(longitude, latitude, ccrs.PlateCarree())
    return chunk_index.sel(x=x, y=y, method="nearest")

# Sample Data

In [4]:
# Sample data - this can be replaced with other data, as long as it has variables 'latitude','longitude', and 'date'

# create data, locations and time from the provided training dataset in the contest
data = [['A', 39.474744, -86.898353, '2021-08-23'],
  ['B', 35.980000, -78.839410, '2021-08-16'],
  ['C', 38.04947, -99.827, '2019-07-23']]

# create the pandas DataFrame
df = pd.DataFrame(data, columns=['sample', 'latitude','longitude','date'])

# print dataframe
df

Unnamed: 0,sample,latitude,longitude,date
0,A,39.474744,-86.898353,2021-08-23
1,B,35.98,-78.83941,2021-08-16
2,C,38.04947,-99.827,2019-07-23


# Extract Matchups

In [5]:
# Create a list to store output
results = []

for i in range(len(df)):
    print(i)
    row = df.iloc[i]

    try:

        ### Time

        # Local time at 12pm
        # https://stackoverflow.com/questions/79797/how-to-convert-local-time-string-to-utc

        naive = datetime.strptime(str(row.date) + " 12:00:00", "%Y-%m-%d %H:%M:%S")
        print('local 12pm: ' + str(naive))

        # Find time zone
        # https://stackoverflow.com/questions/15742045/getting-time-zone-from-lat-long-coordinates

        timezone_str = tzwhere.tzNameAt(row.latitude, row.longitude) # Seville coordinates
        print('Local time zone: ' + str(timezone_str))

        # Find UTC equavalent of local 12pm on that day
        local = pytz.timezone(timezone_str)
        local_dt = local.localize(naive, is_dst=None)
        utc_dt = local_dt.astimezone(pytz.utc)

        print('UTC equavalent: ' + str(utc_dt))

        ### HRRR data
        utc_dt_date = utc_dt.strftime("%Y%m%d")
        utc_dt_hour = utc_dt.strftime("%H")

        url = "s3://hrrrzarr/sfc/" + str(utc_dt_date) + "/" + str(utc_dt_date) + "_" + str(utc_dt_hour) + "z_anl.zarr"

        print('url: ' + str(url))

        fs = s3fs.S3FileSystem(anon=True)
        store = zarr.open(s3fs.S3Map(url, s3=fs))

        ### Nearest point and XY indices
        nearest_point = get_nearest_point(projection, chunk_index, row.longitude, row.latitude)

        first_index = int(nearest_point.index_y.values)
        second_index = int(nearest_point.index_x.values)

        print('Nearest point:')
        print(first_index)
        print(second_index)

        # Access multiple variables at a time: https://mesowest.utah.edu/html/hrrr/zarr_documentation/html/zarr_api_multiple_hrrr_variables.html
        # A complete list of HRRR variables can be found here: https://mesowest.utah.edu/html/hrrr/zarr_documentation/html/zarr_variables.html
        v15 = store["0m_underground/MSTAV/0m_underground/MSTAV"][first_index, second_index]
        v33 = store["10m_above_ground/WIND_max_fcst/10m_above_ground/WIND_max_fcst"][first_index, second_index]
        v134 = store["surface/VIS/surface/VIS"][first_index, second_index]
        v135 = store["surface/GUST/surface/GUST"][first_index, second_index]
        v138 = store["surface/PRES/surface/PRES"][first_index, second_index]
        v140 = store["surface/TMP/surface/TMP"][first_index, second_index]
        v148 = store["surface/SNOWC/surface/SNOWC"][first_index, second_index]
        v168 = store["surface/SHTFL/surface/SHTFL"][first_index, second_index]
        v169 = store["surface/LHTFL/surface/LHTFL"][first_index, second_index]
        v174 = store["surface/GFLUX/surface/GFLUX"][first_index, second_index]
        v178 = store["surface/ULWRF/surface/ULWRF"][first_index, second_index]
        v181 = store["surface/DSWRF/surface/DSWRF"][first_index, second_index]
        v185 = store["surface/LAND/surface/LAND"][first_index, second_index]
        v186 = store["surface/ICEC/surface/ICEC"][first_index, second_index]

        # Save data as a row
        row_data = {'uid': row['sample'],
                    'UTC_time': utc_dt.strftime("%Y/%m/%d/%H"),
                    'longitude': row['longitude'],
                    'latitude': row['latitude'],
                    'v15': v15,
                    'v33': v33,
                    'v134': v134,
                    'v135': v135,
                    'v138': v138,
                    'v140': v140,
                    'v148': v148,
                    'v168': v168,
                    'v169': v169,
                    'v174': v174,
                    'v178': v178,
                    'v181': v181,
                    'v185': v185,
                    'v186': v186}

        print(row_data)
        results.append(row_data)

    except:
        print('Failed to retrieve HRRR')
        pass


results = pd.DataFrame(results)

0
local 12pm: 2021-08-23 12:00:00
Local time zone: America/Indiana/Indianapolis
UTC equavalent: 2021-08-23 16:00:00+00:00
url: s3://hrrrzarr/sfc/20210823/20210823_16z_anl.zarr
Nearest point:
583
1203
{'uid': 'A', 'UTC_time': '2021/08/23/16', 'longitude': -86.898353, 'latitude': 39.474744, 'v15': 15.4, 'v33': 2.375, 'v134': 26400.0, 'v135': 3.217, 'v138': 98900.0, 'v140': 309.0, 'v148': 0.0, 'v168': 258.0, 'v169': 65.0, 'v174': 5.0, 'v178': 481.0, 'v181': 793.0, 'v185': 1.0, 'v186': 0.0}
1
local 12pm: 2021-08-16 12:00:00
Local time zone: America/New_York
UTC equavalent: 2021-08-16 16:00:00+00:00
url: s3://hrrrzarr/sfc/20210816/20210816_16z_anl.zarr
Nearest point:
493
1457
{'uid': 'B', 'UTC_time': '2021/08/16/16', 'longitude': -78.83941, 'latitude': 35.98, 'v15': 75.5, 'v33': 1.501, 'v134': 16300.0, 'v135': 2.768, 'v138': 100920.0, 'v140': 303.5, 'v148': 0.0, 'v168': 116.0, 'v169': 160.0, 'v174': -60.0, 'v178': 452.5, 'v181': 545.5, 'v185': 1.0, 'v186': 0.0}
2
local 12pm: 2019-07-23 12:0

# View and Export Results

In [6]:
# Examine dataframe, the meanings of the variables can be found here: https://mesowest.utah.edu/html/hrrr/zarr_documentation/html/zarr_variables.html
print(results)

  uid       UTC_time  longitude   latitude        v15       v33     v134  \
0   A  2021/08/23/16 -86.898353  39.474744  15.398438  2.375000  26400.0   
1   B  2021/08/16/16 -78.839410  35.980000  75.500000  1.500977  16304.0   
2   C  2019/07/23/17 -99.827000  38.049470  19.906250  1.814453  38592.0   

       v135      v138    v140  v148   v168   v169  v174    v178   v181  v185  \
0  3.216797   98900.0  309.00   0.0  258.0   65.0   5.0  481.00  793.0   1.0   
1  2.767578  100920.0  303.50   0.0  116.0  160.0 -60.0  452.50  545.5   1.0   
2  3.931641   94460.0  307.25   0.0  240.0   80.0 -90.0  464.25  917.0   1.0   

      v186  
0      0.0  
1      0.0  
2 -10000.0  


In [7]:
# Examine a single row
print(results.iloc[0])

uid                      A
UTC_time     2021/08/23/16
longitude       -86.898353
latitude         39.474744
v15              15.398438
v33                  2.375
v134               26400.0
v135              3.216797
v138               98900.0
v140                 309.0
v148                   0.0
v168                 258.0
v169                  65.0
v174                   5.0
v178                 481.0
v181                 793.0
v185                   1.0
v186                   0.0
Name: 0, dtype: object


In [8]:
# To save data if needed
# results.to_csv('HRRR_extracted.csv', index=False)