In [48]:
import pandas as pd
import numpy as np
import requests
from fractions import Fraction
from tqdm import tqdm
from os import listdir, mkdir
import cfgrib
from herbie.archive import Herbie
from herbie.tools import fast_Herbie_download
import xarray as xr
import Nio
import Ngl
import cartopy.crs as ccrs
import s3fs
import numcodecs as ncd
import pickle

#many thanks to this here https://mesowest.utah.edu/html/hrrr/zarr_documentation/html/ex_python_plot_zarr.html

fs = s3fs.S3FileSystem(anon=True)

metar_list = pd.read_csv('Data/BOS.csv')

In [28]:
#this assumes that visibility is always the first value reported in standard miles (ending in SM) in the metar
def find_visibility(metar):
    metar_list = metar.split(' ')
    for i,datapoint in enumerate(metar_list):
        if datapoint[-2:] == 'SM':
            if metar_list[i][0] == 'M':
                #special case to cover >1/4SM visibility
                return float(Fraction(metar_list[i][1:-2]))
            else:
                return float(Fraction(metar_list[i][:-2]))
#these two rely on broken and overcast being the first two things with the leading characters BKN and OVC. Both return None 
#if they don't feature in the METAR
def find_broken_height(metar):
    metar_list = metar.split(' ')
    for i,datapoint in enumerate(metar_list):
        if datapoint[:3] == 'BKN':
            return 100 * int(metar_list[i][3:6])
    return None

def find_overcast_height(metar):
    metar_list = metar.split(' ')
    for i,datapoint in enumerate(metar_list):
        if datapoint[:3] == 'OVC':
            return 100 * int(metar_list[i][3:6])
    return None

#Just combines the two above, handling all the NONE cases
def find_ceiling_height(metar):
    if find_overcast_height(metar) is None and find_broken_height(metar) is None:
        return None
    if find_overcast_height(metar) is None:
        return find_broken_height(metar)
    if find_broken_height(metar) is None:
        return find_overcast_height(metar)
    return min(find_overcast_height(metar), find_broken_height(metar))

#This uses the fact that the timestamp of the METAR is always ddttttZ, allowing for easy conversion to 24-hour Zulu time by trimming the ends
#Then this just looks at the last 6-hour mark preceding that timestamp
def GLAMPstamp(metar):
    metar_list = metar.split(' ')
    datapoint = metar_list[1]
    initalization_time = None
    timestamp = int(datapoint[2:-1])
    if timestamp < 600:
        initalization_time = '00:00'
    elif timestamp < 1200:
        initalization_time = '06:00'
    elif timestamp < 1800:
        initalization_time = '12:00'
    else: 
        initalization_time = '18:00'
    return initalization_time

#Gets the previous hour for the HRRR before the metar
def HRRRstamp(metar):
    metar_list = metar.split(' ')
    datapoint = metar_list[1]
    hour = datapoint.split(':')[0]
    if len(hour) == 1:
        hour = '0' + hour
    return hour

#This takes a date in mm/dd/yyyy and converts it to yyyy-mm-dd
def format_date(validString, hrrrDate=False):
    dmy = validString.split(' ')[0]
    month, day, year = dmy.split('/')
    if len(day) == 1:
        day = '0' + day
    if len(month) == 1:
        month = '0' + month
    if hrrrDate:
        return f'{year}{month}{day}'
    else:
        return f'{year}-{month}-{day}'
    
    
def retrieve_data(s3_url):
    with fs.open(s3_url, 'rb') as compressed_data: # using s3fs
        buffer = ncd.blosc.decompress(compressed_data.read())

        dtype = "<f2"
        if "surface/PRES" in s3_url: # surface/PRES is the only variable with a larger data type
            dtype = "<f4"

        chunk = np.frombuffer(buffer, dtype=dtype)
        
        entry_size = 150*150
        num_entries = len(chunk)//entry_size

        if num_entries == 1: # analysis file is 2d
            data_array = np.reshape(chunk, (150, 150))
        else:
            data_array = np.reshape(chunk, (num_entries, 150, 150))

    return data_array


In [29]:
metar_list['visibility'] = [find_visibility(metar) for metar in metar_list['metar']]
metar_list['ceiling'] = [find_ceiling_height(metar) for metar in metar_list['metar']]
metar_list['ifr_event'] = (metar_list['ceiling'] <= 1000) | (metar_list['visibility'] <= 1)
metar_list['GLAMP timestamp'] = [f'{format_date(metar_list["valid"][i])}T{GLAMPstamp(metar)}Z'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR day'] = [f'{format_date(metar_list["valid"][i], hrrrDate=True)}'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR time'] = [f'{HRRRstamp(metar_list["valid"][i])}' for i, metar in enumerate(metar_list['metar'])]

In [32]:
ifr_list = metar_list[metar_list['ifr_event']]
non_ifr_list = metar_list[~metar_list['ifr_event']]

In [35]:
#Latlon for Logan airport
point_lon, point_lat = -71.0096, 42.3656

# (Variable, level):
var_list = [('TMP', 'surface'),
            ('TMP', '500mb'),
            ('TMP', '700mb'),
            ('TMP', '850mb'),
            ('TMP', '925mb'),
            ('TMP', '1000mb'),
            ('VGRD', '10m_above_ground'),
            ('UGRD', '10m_above_ground'),
            ('VGRD', '250mb'),
            ('UGRD', '250mb'),
            ('VGRD', '300mb'),
            ('UGRD', '300mb'),
            ('VGRD', '500mb'),
            ('UGRD', '500mb'),
            ('VGRD', '700mb'),
            ('UGRD', '700mb'),
            ('VGRD', '850mb'),
            ('UGRD', '850mb'),
            ('VGRD', '925mb'),
            ('UGRD', '925mb'),
            ('VGRD', '1000mb'),
            ('UGRD', '1000mb'),
            ('DPT', '2m_above_ground'),
            ('DPT', '500mb'),
            ('DPT', '700mb'),
            ('DPT', '850mb'),
            ('DPT', '925mb'),
            ('DPT', '1000mb'),
            ('HGT', 'cloud_base'),
            ('HGT', 'cloud_ceiling'),
            ('VIS', 'surface')]

In [36]:
chunk_index = xr.open_zarr(s3fs.S3Map("s3://hrrrzarr/grid/HRRR_chunk_index.zarr", s3=fs))

In [37]:
projection = ccrs.LambertConformal(central_longitude=262.5, 
                                   central_latitude=38.5, 
                                   standard_parallels=(38.5, 38.5),
                                    globe=ccrs.Globe(semimajor_axis=6371229,
                                                     semiminor_axis=6371229))

x, y = projection.transform_point(point_lon, point_lat, ccrs.PlateCarree())

nearest_point = chunk_index.sel(x=x, y=y, method="nearest")
fcst_chunk_id = f"0.{nearest_point.chunk_id.values}"

  indexer = self.index.get_loc(
  indexer = self.index.get_loc(


In [None]:
print(len(ifr_list['HRRR time']))
for hr, date in tqdm(zip(ifr_list['HRRR time'], ifr_list['HRRR day'])):
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        if f'{date}{hr}' not in listdir('Data/hrrr'):
            mkdir(f'Data/hrrr/{date}{hr}')
        if f'{var}_{level}.pickle' in listdir(f'Data/hrrr/{date}{hr}'):
            continue
        data = retrieve_data(data_url + fcst_chunk_id)
        with open(f'Data/hrrr/{date}{hr}/{var}_{level}.pickle', 'wb') as handle:
            pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

2942


25it [00:22,  2.45s/it]

In [6]:
for time in tqdm(ifr_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/IFR records/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  2%|█▊                                                                              | 68/2942 [00:00<00:09, 291.01it/s]

2020-09-28T06


  4%|███▍                                                                            | 128/2942 [00:01<00:30, 91.68it/s]

2020-09-28T06


 10%|████████                                                                       | 300/2942 [00:01<00:10, 259.04it/s]

2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00


 12%|█████████▉                                                                      | 366/2942 [00:07<01:30, 28.33it/s]

2020-11-27T00


100%|██████████████████████████████████████████████████████████████████████████████| 2942/2942 [00:14<00:00, 209.11it/s]


In [8]:
for time in tqdm(non_ifr_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/Non-IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/Non-IFR records/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  5%|███▊                                                                          | 875/17675 [00:06<02:01, 138.79it/s]

2020-09-28T06
2020-09-28T06


  5%|███▉                                                                           | 889/17675 [00:07<09:25, 29.66it/s]

2020-09-28T06
2020-09-28T06
2020-09-28T06
2020-09-28T06
2020-09-28T06


  5%|████                                                                           | 915/17675 [00:10<14:39, 19.06it/s]

2020-09-28T06


 30%|███████████████████████                                                      | 5291/17675 [00:41<01:25, 145.17it/s]

2021-03-30T06
2021-03-30T06
2021-03-30T06
2021-03-30T06
2021-03-30T06


 30%|███████████████████████▍                                                      | 5321/17675 [00:43<08:55, 23.09it/s]

2021-03-30T06


 57%|███████████████████████████████████████████▎                                | 10083/17675 [01:18<00:55, 137.15it/s]

2021-10-14T00
2021-10-14T00
2021-10-14T00
2021-10-14T00
2021-10-14T00


 57%|████████████████████████████████████████████                                 | 10111/17675 [01:20<05:36, 22.49it/s]

2021-10-14T00


 75%|████████████████████████████████████████████████████████▉                   | 13252/17675 [01:43<00:34, 127.97it/s]


KeyboardInterrupt: 

2020-09-28T06:00Z, 2020-11-27T00:00Z, 2021-03-30T06, 2021-10-14T00 and 2022-06-29T12 seem to be lost

In [None]:
base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
params = {'station': 'KBOS',
          'model': 'LAV',
          'runtime': '2020-11-27T00:00Z'}
response = requests.get(base_url, params=params)
print(response)
result = pd.DataFrame(response)
#result.to_csv(f'Data/GLAMP IFR data/{time[:-4]}Z.csv')

In [19]:
date="2021-07-22"
H2 = Herbie(date, model="hrrr", product="sfc", save_dir='Data')
H2.download('(VGRD|UGRD|TMP|DPT|VIS)', save_dir = f'Data/hrrr/{date}')

✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2021-Jul-22 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m


  logic = df.search_this.str.contains(searchString)


👨🏻‍🏭 Created directory: [/mnt/c/Users/bpara/Documents/git/nwsFlightCategories/Data/hrrr/20210722]


[48;2;255;255;255m[38;2;136;33;27m▌[0m[38;2;12;53;118m[48;2;240;234;210m▌[38;2;0;0;0m[1mHerbie[0m HRRR model [3msfc[0m product initialized [38;2;41;130;13m2021-Jul-22 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3msource=aws[0m

In [20]:
H2_path = 'Data/hrrr/20210722/subset_dfa361b29e2779abb789dd9ce7618f6cf8d1bd80__hrrr.t00z.wrfsfcf00.grib2'

In [21]:
file =  Nio.open_file(H2_path,"r")

In [16]:
file.close()

In [22]:
print(file.variables)

{'TMP_P0_L1_GLC0': <Nio.NioVariable object at 0x7f065d47e140>, 'TMP_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47e260>, 'TMP_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47e2c0>, 'DPT_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47e170>, 'DPT_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47e3e0>, 'UGRD_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47cca0>, 'UGRD_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47d180>, 'VGRD_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47d030>, 'VGRD_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47d0c0>, 'VIS_P0_L1_GLC0': <Nio.NioVariable object at 0x7f065d47d000>, 'lv_HTGL2': <Nio.NioVariable object at 0x7f065d47ce20>, 'lv_ISBL1': <Nio.NioVariable object at 0x7f065d47c370>, 'lv_ISBL0': <Nio.NioVariable object at 0x7f065d47cf10>, 'gridrot_0': <Nio.NioVariable object at 0x7f065d47ce80>, 'gridlat_0': <Nio.NioVariable object at 0x7f065d47cc70>, 'gridlon_0': <Nio.NioVariable object at 0x7f065d47d2a0>}


In [23]:
for key in file.variables:
    print(file.variables[key])

Variable: TMP_P0_L1_GLC0
Type: float
Total Size: 7620564 bytes
            1905141 values
Number of Dimensions: 2
Dimensions and sizes:	[ygrid_0 | 1059] x [xgrid_0 | 1799]
Coordinates: 
            ygrid_0: not a coordinate variable
            xgrid_0: not a coordinate variable
Number of Attributes: 14
         center :	US National Weather Service - NCEP (WMC)
         production_status :	Operational products
         long_name :	Temperature
         units :	K
         _FillValue :	1e+20
         coordinates :	gridlat_0 gridlon_0
         grid_type :	Lambert Conformal can be secant or tangent, conical or bipolar
         parameter_discipline_and_category :	Meteorological products, Temperature
         parameter_template_discipline_category_number :	[0, 0, 0, 0]
         level_type :	Ground or water surface
         level :	0
         forecast_time :	0
         forecast_time_units :	hours
         initial_time :	07/22/2021 (00:00)

Variable: TMP_P0_L100_GLC0
Type: float
Total Size: 381

In [43]:
file.variables

{'TMP_P0_L1_GLC0': <Nio.NioVariable object at 0x7f065d47e140>, 'TMP_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47e260>, 'TMP_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47e2c0>, 'DPT_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47e170>, 'DPT_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47e3e0>, 'UGRD_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47cca0>, 'UGRD_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47d180>, 'VGRD_P0_L100_GLC0': <Nio.NioVariable object at 0x7f065d47d030>, 'VGRD_P0_L103_GLC0': <Nio.NioVariable object at 0x7f065d47d0c0>, 'VIS_P0_L1_GLC0': <Nio.NioVariable object at 0x7f065d47d000>, 'lv_HTGL2': <Nio.NioVariable object at 0x7f065d47ce20>, 'lv_ISBL1': <Nio.NioVariable object at 0x7f065d47c370>, 'lv_ISBL0': <Nio.NioVariable object at 0x7f065d47cf10>, 'gridrot_0': <Nio.NioVariable object at 0x7f065d47ce80>, 'gridlat_0': <Nio.NioVariable object at 0x7f065d47cc70>, 'gridlon_0': <Nio.NioVariable object at 0x7f065d47d2a0>}