In [1]:
import pandas as pd
import requests
from fractions import Fraction
from tqdm import tqdm
from os import listdir
import cfgrib
from herbie.archive import Herbie
from herbie.tools import fast_Herbie_download
import xarray as xr
import Nio
import Ngl

In [2]:
metar_list = pd.read_csv('Data/BOS.csv')

In [3]:
#this assumes that visibility is always the first value reported in standard miles (ending in SM) in the metar
def find_visibility(metar):
    metar_list = metar.split(' ')
    for i,datapoint in enumerate(metar_list):
        if datapoint[-2:] == 'SM':
            if metar_list[i][0] == 'M':
                #special case to cover >1/4SM visibility
                return float(Fraction(metar_list[i][1:-2]))
            else:
                return float(Fraction(metar_list[i][:-2]))
#these two rely on broken and overcast being the first two things with the leading characters BKN and OVC. Both return None 
#if they don't feature in the METAR
def find_broken_height(metar):
    metar_list = metar.split(' ')
    for i,datapoint in enumerate(metar_list):
        if datapoint[:3] == 'BKN':
            return 100 * int(metar_list[i][3:6])
    return None

def find_overcast_height(metar):
    metar_list = metar.split(' ')
    for i,datapoint in enumerate(metar_list):
        if datapoint[:3] == 'OVC':
            return 100 * int(metar_list[i][3:6])
    return None

#Just combines the two above, handling all the NONE cases
def find_ceiling_height(metar):
    if find_overcast_height(metar) is None and find_broken_height(metar) is None:
        return None
    if find_overcast_height(metar) is None:
        return find_broken_height(metar)
    if find_broken_height(metar) is None:
        return find_overcast_height(metar)
    return min(find_overcast_height(metar), find_broken_height(metar))

#This uses the fact that the timestamp of the METAR is always ddttttZ, allowing for easy conversion to 24-hour Zulu time by trimming the ends
#Then this just looks at the last 6-hour mark preceding that timestamp
def GLAMPstamp(metar):
    metar_list = metar.split(' ')
    datapoint = metar_list[1]
    initalization_time = None
    timestamp = int(datapoint[2:-1])
    if timestamp < 600:
        initalization_time = '00:00'
    elif timestamp < 1200:
        initalization_time = '06:00'
    elif timestamp < 1800:
        initalization_time = '12:00'
    else: 
        initalization_time = '18:00'
    return initalization_time

def HRRRstamp(metar):
    metar_list = metar.split(' ')
    datapoint = metar_list[1]
    return f'{datapoint[2:4]}:00'

#This takes a date in mm/dd/yyyy and converts it to yyyy-mm-dd
def format_date(validString):
    dmy = validString.split(' ')[0]
    month, day, year = dmy.split('/')
    if len(day) == 1:
        day = '0' + day
    if len(month) == 1:
        month = '0' + month
    return f'{year}-{month}-{day}'

In [4]:
metar_list['visibility'] = [find_visibility(metar) for metar in metar_list['metar']]
metar_list['ceiling'] = [find_ceiling_height(metar) for metar in metar_list['metar']]
metar_list['ifr_event'] = (metar_list['ceiling'] <= 1000) | (metar_list['visibility'] <= 1)
metar_list['GLAMP timestamp'] = [f'{format_date(metar_list["valid"][i])}T{GLAMPstamp(metar)}Z'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR timestamp'] = [f'{format_date(metar_list["valid"][i])}T{HRRRstamp(metar)}Z'
                                 for i, metar in enumerate(metar_list['metar'])]

In [5]:
ifr_list = metar_list[metar_list['ifr_event']]
non_ifr_list = metar_list[~metar_list['ifr_event']]

In [6]:
for time in tqdm(ifr_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/IFR records/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  2%|█▊                                                                              | 68/2942 [00:00<00:09, 291.01it/s]

2020-09-28T06


  4%|███▍                                                                            | 128/2942 [00:01<00:30, 91.68it/s]

2020-09-28T06


 10%|████████                                                                       | 300/2942 [00:01<00:10, 259.04it/s]

2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00


 12%|█████████▉                                                                      | 366/2942 [00:07<01:30, 28.33it/s]

2020-11-27T00


100%|██████████████████████████████████████████████████████████████████████████████| 2942/2942 [00:14<00:00, 209.11it/s]


In [8]:
for time in tqdm(non_ifr_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/Non-IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/Non-IFR records/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  5%|███▊                                                                          | 875/17675 [00:06<02:01, 138.79it/s]

2020-09-28T06
2020-09-28T06


  5%|███▉                                                                           | 889/17675 [00:07<09:25, 29.66it/s]

2020-09-28T06
2020-09-28T06
2020-09-28T06
2020-09-28T06
2020-09-28T06


  5%|████                                                                           | 915/17675 [00:10<14:39, 19.06it/s]

2020-09-28T06


 30%|███████████████████████                                                      | 5291/17675 [00:41<01:25, 145.17it/s]

2021-03-30T06
2021-03-30T06
2021-03-30T06
2021-03-30T06
2021-03-30T06


 30%|███████████████████████▍                                                      | 5321/17675 [00:43<08:55, 23.09it/s]

2021-03-30T06


 57%|███████████████████████████████████████████▎                                | 10083/17675 [01:18<00:55, 137.15it/s]

2021-10-14T00
2021-10-14T00
2021-10-14T00
2021-10-14T00
2021-10-14T00


 57%|████████████████████████████████████████████                                 | 10111/17675 [01:20<05:36, 22.49it/s]

2021-10-14T00


 75%|████████████████████████████████████████████████████████▉                   | 13252/17675 [01:43<00:34, 127.97it/s]


KeyboardInterrupt: 

2020-09-28T06:00Z, 2020-11-27T00:00Z, 2021-03-30T06, 2021-10-14T00 and 2022-06-29T12 seem to be lost

In [None]:
base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
params = {'station': 'KBOS',
          'model': 'LAV',
          'runtime': '2020-11-27T00:00Z'}
response = requests.get(base_url, params=params)
print(response)
result = pd.DataFrame(response)
#result.to_csv(f'Data/GLAMP IFR data/{time[:-4]}Z.csv')

In [None]:
herbies = fast_Herbie_download(ifr_list['HRRR timestamp'].tolist(), searchstring='(VGRD|UGRD|TMP|DPT|VIS|)', 
                               model="hrrr", product="prs", save_dir='Data', verbose = True)
'''
for time in tqdm(ifr_list['GLAMP timestamp']):   
    H2 = Herbie(time[:-4], model="hrrr", product="prs", save_dir='Data')
    H2.download('(VGRD|UGRD|TMP|DPT|VIS|)')
'''

✅ Found ┊ model=hrrr ┊ [3mproduct=prs[0m ┊ [38;2;41;130;13m2020-Aug-23 21:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr ┊ [3mproduct=prs[0m ┊ [38;2;41;130;13m2020-Sep-02 22:00 UTC[92m F00[0m✅ Found ┊ model=hrrr ┊ [3mproduct=prs[0m ┊ [38;2;41;130;13m2020-Sep-02 21:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr ✅ Found ┊ model=hrrr┊ [3mproduct=prs[0m ✅ Found  ┊ [38;2;41;130;13m2020-Sep-02 22:00 UTC[92m F00[0m ┊ [3mproduct=prs[0m✅ Found ┊ model=hrrr ┊ [3mproduct=prs[0m ┊ [38;2;41;130;13m2020-Aug-23 21:00 UTC[92m F00[0m  ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr  ┊ model=hrrr ┊ [3mproduct=prs[0m ┊ [38;2;41;130;13m2020-Sep-02 17:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m┊ [3mproduct=prs[0m ┊ [38;2;41;130;13m2020-Aug-2

In [None]:
date="2021-07-22:6"
H2 = Herbie(date, model="hrrr", product="prs", save_dir='Data')
H2.download('(VGRD|UGRD|TMP|DPT|VIS|TCDC|LCDC|MCDC|HCDC)', save_dir = f'Data/hrrr/{date}')

ParserError: Unknown string format: 2021-07-22:6

In [61]:
H2_path = 'Data/hrrr/20210722/subset_0cb0ecbc19417cf0e45cf64e0953a7b245a6c4db__hrrr.t00z.wrfprsf00.grib2'

In [62]:
file =  Nio.open_file(H2_path,"r")

In [54]:
file.close()

In [63]:
print(file.variables)

{'TMP_P0_L1_GLC0': <Nio.NioVariable object at 0x7f7806191a50>, 'TMP_P0_L100_GLC0': <Nio.NioVariable object at 0x7f7806191810>, 'TMP_P0_L103_GLC0': <Nio.NioVariable object at 0x7f78061908e0>, 'DPT_P0_L100_GLC0': <Nio.NioVariable object at 0x7f7806191ba0>, 'DPT_P0_L103_GLC0': <Nio.NioVariable object at 0x7f7806191750>, 'UGRD_P0_L100_GLC0': <Nio.NioVariable object at 0x7f78061911e0>, 'UGRD_P0_L103_GLC0': <Nio.NioVariable object at 0x7f7806191150>, 'VGRD_P0_L100_GLC0': <Nio.NioVariable object at 0x7f7806191450>, 'VGRD_P0_L103_GLC0': <Nio.NioVariable object at 0x7f7806190eb0>, 'TCDC_P0_L10_GLC0': <Nio.NioVariable object at 0x7f78061912d0>, 'LCDC_P0_L214_GLC0': <Nio.NioVariable object at 0x7f7806191390>, 'MCDC_P0_L224_GLC0': <Nio.NioVariable object at 0x7f7806190e50>, 'HCDC_P0_L234_GLC0': <Nio.NioVariable object at 0x7f7806191210>, 'VIS_P0_L1_GLC0': <Nio.NioVariable object at 0x7f7806190fa0>, 'lv_HTGL1': <Nio.NioVariable object at 0x7f7806191cf0>, 'lv_ISBL0': <Nio.NioVariable object at 0x7f7

In [64]:
for key in file.variables:
    print(file.variables[key])

Variable: TMP_P0_L1_GLC0
Type: float
Total Size: 7620564 bytes
            1905141 values
Number of Dimensions: 2
Dimensions and sizes:	[ygrid_0 | 1059] x [xgrid_0 | 1799]
Coordinates: 
            ygrid_0: not a coordinate variable
            xgrid_0: not a coordinate variable
Number of Attributes: 14
         center :	US National Weather Service - NCEP (WMC)
         production_status :	Operational products
         long_name :	Temperature
         units :	K
         _FillValue :	1e+20
         coordinates :	gridlat_0 gridlon_0
         grid_type :	Lambert Conformal can be secant or tangent, conical or bipolar
         parameter_discipline_and_category :	Meteorological products, Temperature
         parameter_template_discipline_category_number :	[0, 0, 0, 0]
         level_type :	Ground or water surface
         level :	0
         forecast_time :	0
         forecast_time_units :	hours
         initial_time :	07/22/2021 (00:00)

Variable: TMP_P0_L100_GLC0
Type: float
Total Size: 304