In [1]:
import pandas as pd
import numpy as np
import requests
from fractions import Fraction
from tqdm import tqdm
from os import listdir, mkdir
import xarray as xr
from IOfuncs import *
import cartopy.crs as ccrs
import s3fs
import numcodecs as ncd
import pickle

#many thanks to this here https://mesowest.utah.edu/html/hrrr/zarr_documentation/html/ex_python_plot_zarr.html

fs = s3fs.S3FileSystem(anon=True)

metar_list = pd.read_csv('Data/BOS.csv')

In [5]:
metar_list['visibility'] = [find_visibility(metar) for metar in metar_list['metar']]
metar_list['ceiling'] = [find_ceiling_height(metar) for metar in metar_list['metar']]
metar_list['ifr_event'] = ((500 < metar_list['ceiling']) & (metar_list['ceiling'] <= 1000) | 
                           (1 < metar_list['visibility']) & (metar_list['ceiling'] <= 3))
metar_list['lifr_event'] = (metar_list['ceiling'] <= 500) | (metar_list['visibility'] <= 1)
metar_list['mvfr_event'] = ((1000 < metar_list['ceiling']) & (metar_list['ceiling'] <= 3000) | 
                            (3 < metar_list['visibility']) & (metar_list['ceiling'] <= 5))
metar_list['vfr_event'] = (3000 < metar_list['ceiling']) | (5 < metar_list['visibility'])
metar_list['GLAMP timestamp'] = [f'{format_date(metar_list["valid"][i])}T{GLAMPstamp(metar)}Z'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR day'] = [f'{format_date(metar_list["valid"][i], hrrrDate=True)}'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR time'] = [f'{HRRRstamp(metar_list["valid"][i])}' for i, metar in enumerate(metar_list['metar'])]

In [None]:
ifr_list = metar_list[metar_list['ifr_event']]
non_ifr_list = metar_list[~metar_list['ifr_event']]
np.random.seed(42)
non_ifr_draws = non_ifr_list.iloc[np.random.choice(len(non_ifr_list), 2942)]

In [None]:
#Latlon for Logan airport
point_lon, point_lat = -71.0096, 42.3656

# (Variable, level):
var_list = [('TMP', 'surface'),
            ('TMP', '500mb'),
            ('TMP', '700mb'),
            ('TMP', '850mb'),
            ('TMP', '925mb'),
            ('TMP', '1000mb'),
            ('VGRD', '10m_above_ground'),
            ('UGRD', '10m_above_ground'),
            ('VGRD', '250mb'),
            ('UGRD', '250mb'),
            ('VGRD', '300mb'),
            ('UGRD', '300mb'),
            ('VGRD', '500mb'),
            ('UGRD', '500mb'),
            ('VGRD', '700mb'),
            ('UGRD', '700mb'),
            ('VGRD', '850mb'),
            ('UGRD', '850mb'),
            ('VGRD', '925mb'),
            ('UGRD', '925mb'),
            ('VGRD', '1000mb'),
            ('UGRD', '1000mb'),
            ('DPT', '2m_above_ground'),
            ('DPT', '500mb'),
            ('DPT', '700mb'),
            ('DPT', '850mb'),
            ('DPT', '925mb'),
            ('DPT', '1000mb'),
            ('HGT', 'cloud_base'),
            ('HGT', 'cloud_ceiling'),
            ('VIS', 'surface')]

In [None]:
chunk_index = xr.open_zarr(s3fs.S3Map("s3://hrrrzarr/grid/HRRR_chunk_index.zarr", s3=fs))

In [None]:
projection = ccrs.LambertConformal(central_longitude=262.5, 
                                   central_latitude=38.5, 
                                   standard_parallels=(38.5, 38.5),
                                    globe=ccrs.Globe(semimajor_axis=6371229,
                                                     semiminor_axis=6371229))

x, y = projection.transform_point(point_lon, point_lat, ccrs.PlateCarree())

nearest_point = chunk_index.sel(x=x, y=y, method="nearest")
fcst_chunk_id = f"0.{nearest_point.chunk_id.values}"

In [None]:
print(len(ifr_list['HRRR time']))
for hr, date in tqdm(zip(ifr_list['HRRR time'], ifr_list['HRRR day'])):
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        if f'{date}{hr}' not in listdir('Data/hrrr'):
            mkdir(f'Data/hrrr/{date}{hr}')
        if f'{var}_{level}.pickle' in listdir(f'Data/hrrr/{date}{hr}'):
            continue
        data = retrieve_data(data_url + fcst_chunk_id)
        with open(f'Data/hrrr/{date}{hr}/{var}_{level}.pickle', 'wb') as handle:
            pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
print(len(non_ifr_draws['HRRR time']))
for hr, date in tqdm(zip(non_ifr_draws['HRRR time'], non_ifr_draws['HRRR day'])):
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        if f'{date}{hr}' not in listdir('Data/hrrr'):
            mkdir(f'Data/hrrr/{date}{hr}')
        if f'{var}_{level}.pickle' in listdir(f'Data/hrrr/{date}{hr}'):
            continue
        data = retrieve_data(data_url + fcst_chunk_id)
        with open(f'Data/hrrr/{date}{hr}/{var}_{level}.pickle', 'wb') as handle:
            pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

2942


87it [27:29, 18.96s/it]

KeyboardInterrupt



In [6]:
for time in tqdm(ifr_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/IFR records/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  2%|█▊                                                                              | 68/2942 [00:00<00:09, 291.01it/s]

2020-09-28T06


  4%|███▍                                                                            | 128/2942 [00:01<00:30, 91.68it/s]

2020-09-28T06


 10%|████████                                                                       | 300/2942 [00:01<00:10, 259.04it/s]

2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00


 12%|█████████▉                                                                      | 366/2942 [00:07<01:30, 28.33it/s]

2020-11-27T00


100%|██████████████████████████████████████████████████████████████████████████████| 2942/2942 [00:14<00:00, 209.11it/s]


In [8]:
for time in tqdm(non_ifr_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/Non-IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/Non-IFR records/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  5%|███▊                                                                          | 875/17675 [00:06<02:01, 138.79it/s]

2020-09-28T06
2020-09-28T06


  5%|███▉                                                                           | 889/17675 [00:07<09:25, 29.66it/s]

2020-09-28T06
2020-09-28T06
2020-09-28T06
2020-09-28T06
2020-09-28T06


  5%|████                                                                           | 915/17675 [00:10<14:39, 19.06it/s]

2020-09-28T06


 30%|███████████████████████                                                      | 5291/17675 [00:41<01:25, 145.17it/s]

2021-03-30T06
2021-03-30T06
2021-03-30T06
2021-03-30T06
2021-03-30T06


 30%|███████████████████████▍                                                      | 5321/17675 [00:43<08:55, 23.09it/s]

2021-03-30T06


 57%|███████████████████████████████████████████▎                                | 10083/17675 [01:18<00:55, 137.15it/s]

2021-10-14T00
2021-10-14T00
2021-10-14T00
2021-10-14T00
2021-10-14T00


 57%|████████████████████████████████████████████                                 | 10111/17675 [01:20<05:36, 22.49it/s]

2021-10-14T00


 75%|████████████████████████████████████████████████████████▉                   | 13252/17675 [01:43<00:34, 127.97it/s]


KeyboardInterrupt: 

2020-09-28T06:00Z, 2020-11-27T00:00Z, 2021-03-30T06, 2021-10-14T00 and 2022-06-29T12 seem to be lost

In [None]:
base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
params = {'station': 'KBOS',
          'model': 'LAV',
          'runtime': '2020-11-27T00:00Z'}
response = requests.get(base_url, params=params)
print(response)
result = pd.DataFrame(response)
#result.to_csv(f'Data/GLAMP IFR data/{time[:-4]}Z.csv')

In [20]:
H2_path = 'Data/hrrr/20210722/subset_dfa361b29e2779abb789dd9ce7618f6cf8d1bd80__hrrr.t00z.wrfsfcf00.grib2'

In [21]:
file =  Nio.open_file(H2_path,"r")

In [16]:
file.close()

https://www.analyticsvidhya.com/blog/2021/06/understanding-random-forest/ good sklearn tutorial