In [1]:
import pandas as pd
import numpy as np
import requests
from fractions import Fraction
from tqdm import tqdm
from os import listdir, mkdir
import xarray as xr
from IOfuncs import *
import cartopy.crs as ccrs
import s3fs
import numcodecs as ncd
import pickle

#many thanks to this here https://mesowest.utah.edu/html/hrrr/zarr_documentation/html/ex_python_plot_zarr.html

fs = s3fs.S3FileSystem(anon=True)

metar_list = pd.read_csv('Data/BOS.csv')

In [2]:
metar_list['visibility'] = [find_visibility(metar) for metar in metar_list['metar']]
metar_list['ceiling'] = [find_ceiling_height(metar) for metar in metar_list['metar']]
metar_list['ifr_event'] = ((500 < metar_list['ceiling']) & (metar_list['ceiling'] <= 1000) | 
                           (1 < metar_list['visibility']) & (metar_list['ceiling'] <= 3))
metar_list['lifr_event'] = (metar_list['ceiling'] <= 500) | (metar_list['visibility'] <= 1)
metar_list['mvfr_event'] = ((1000 < metar_list['ceiling']) & (metar_list['ceiling'] <= 3000) | 
                            (3 < metar_list['visibility']) & (metar_list['ceiling'] <= 5))
metar_list['vfr_event'] = (3000 < metar_list['ceiling']) | (5 < metar_list['visibility'])
metar_list['GLAMP timestamp'] = [f'{format_date(metar_list["valid"][i])}T{GLAMPstamp(metar)}Z'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR day'] = [f'{format_date(metar_list["valid"][i], hrrrDate=True)}'
                                 for i, metar in enumerate(metar_list['metar'])]
metar_list['HRRR time'] = [f'{HRRRstamp(metar_list["valid"][i])}' for i, metar in enumerate(metar_list['metar'])]

In [3]:
ifr_list = metar_list[metar_list['ifr_event']]
lifr_list = metar_list[metar_list['lifr_event']]
mvfr_list = metar_list[metar_list['mvfr_event']]
vfr_list = metar_list[metar_list['vfr_event']]

In [4]:
#Latlon for Logan airport
point_lon, point_lat = -71.0096, 42.3656

# (Variable, level):
var_list = [('TMP', 'surface'),
            ('TMP', '500mb'),
            ('TMP', '700mb'),
            ('TMP', '850mb'),
            ('TMP', '925mb'),
            ('TMP', '1000mb'),
            ('VGRD', '10m_above_ground'),
            ('UGRD', '10m_above_ground'),
            ('VGRD', '250mb'),
            ('UGRD', '250mb'),
            ('VGRD', '300mb'),
            ('UGRD', '300mb'),
            ('VGRD', '500mb'),
            ('UGRD', '500mb'),
            ('VGRD', '700mb'),
            ('UGRD', '700mb'),
            ('VGRD', '850mb'),
            ('UGRD', '850mb'),
            ('VGRD', '925mb'),
            ('UGRD', '925mb'),
            ('VGRD', '1000mb'),
            ('UGRD', '1000mb'),
            ('DPT', '2m_above_ground'),
            ('DPT', '500mb'),
            ('DPT', '700mb'),
            ('DPT', '850mb'),
            ('DPT', '925mb'),
            ('DPT', '1000mb'),
            ('HGT', 'cloud_base'),
            ('HGT', 'cloud_ceiling'),
            ('VIS', 'surface')]

In [5]:
chunk_index = xr.open_zarr(s3fs.S3Map("s3://hrrrzarr/grid/HRRR_chunk_index.zarr", s3=fs))

In [6]:
projection = ccrs.LambertConformal(central_longitude=262.5, 
                                   central_latitude=38.5, 
                                   standard_parallels=(38.5, 38.5),
                                    globe=ccrs.Globe(semimajor_axis=6371229,
                                                     semiminor_axis=6371229))

x, y = projection.transform_point(point_lon, point_lat, ccrs.PlateCarree())

nearest_point = chunk_index.sel(x=x, y=y, method="nearest")
fcst_chunk_id = f"0.{nearest_point.chunk_id.values}"

  indexer = self.index.get_loc(
  indexer = self.index.get_loc(


In [None]:
event_list = lifr_list

print(len(event_list['HRRR time']))
for hr, date in tqdm(zip(event_list['HRRR time'], event_list['HRRR day'])):
    if f'{date}{hr}.csv' in listdir(f'Data/hrrr'):
            continue
    df = pd.DataFrame()
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        data = retrieve_data(data_url + fcst_chunk_id)
        gridpoint_forecast = data[:, nearest_point.in_chunk_x, nearest_point.in_chunk_y]
        df[f'{var}_{level}'] = gridpoint_forecast
    df.to_csv(f'Data/hrrr/{date}{hr}.csv')

In [None]:
event_list = ifr_list

print(len(event_list['HRRR time']))
for hr, date in tqdm(zip(event_list['HRRR time'], event_list['HRRR day'])):
    if f'{date}{hr}.csv' in listdir(f'Data/hrrr'):
        continue
    df = pd.DataFrame()
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        data = retrieve_data(data_url + fcst_chunk_id)
        gridpoint_forecast = data[:, nearest_point.in_chunk_x, nearest_point.in_chunk_y]
        df[f'{var}_{level}'] = gridpoint_forecast
    df.to_csv(f'Data/hrrr/{date}{hr}.csv')

In [29]:
event_list = mvfr_list

print(len(event_list['HRRR time']))
for hr, date in tqdm(zip(event_list['HRRR time'], event_list['HRRR day'])):
    print(date, hr)
    if f'{date}{hr}.csv' in listdir(f'Data/hrrr'):
        continue
    df = pd.DataFrame()
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        data = retrieve_data(data_url + fcst_chunk_id)
        gridpoint_forecast = data[:, nearest_point.in_chunk_x, nearest_point.in_chunk_y]
        df[f'{var}_{level}'] = gridpoint_forecast
    df.to_csv(f'Data/hrrr/{date}{hr}.csv')

2851


0it [00:00, ?it/s]

20200823 20
20200823 20
20200823 20
20200823 20
20200823 20
20200827 16
20200827 18
20200828 11
20200829 08
20200829 09


9it [00:25,  2.89s/it]


KeyboardInterrupt: 

In [None]:
event_list = vfr_list

print(len(event_list['HRRR time']))
for hr, date in tqdm(zip(event_list['HRRR time'], event_list['HRRR day'])):
    df = pd.DataFrame()
    if f'{date}{hr}.csv' in listdir(f'Data/hrrr'):
            continue
    for (var, level) in var_list:
        data_url = f'hrrrzarr/sfc/{date}/{date}_{hr}z_fcst.zarr/{level}/{var}/{level}/{var}/'
        data = retrieve_data(data_url + fcst_chunk_id)
        gridpoint_forecast = data[:, nearest_point.in_chunk_x, nearest_point.in_chunk_y]
        df[f'{var}_{level}'] = gridpoint_forecast
    df.to_csv(f'Data/hrrr/{date}{hr}.csv')

In [None]:
for time in tqdm(event_list['GLAMP timestamp']):   
    if f'{time[:-4]}Z.csv' in listdir('Data/GLAMP IFR data/IFR records'):
        continue
    base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
    params = {'station': 'KBOS',
              'model': 'LAV',
              'runtime': time}
    try:
        response = requests.get(base_url, params=params).json()['data']
        result = pd.DataFrame(response)
        result.to_csv(f'Data/GLAMP IFR data/{time[:-4]}Z.csv')
    except KeyError:
        print(time[:-4])

  2%|█▊                                                                              | 68/2942 [00:00<00:09, 291.01it/s]

2020-09-28T06


  4%|███▍                                                                            | 128/2942 [00:01<00:30, 91.68it/s]

2020-09-28T06


 10%|████████                                                                       | 300/2942 [00:01<00:10, 259.04it/s]

2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00
2020-11-27T00


 12%|█████████▉                                                                      | 366/2942 [00:07<01:30, 28.33it/s]

2020-11-27T00


100%|██████████████████████████████████████████████████████████████████████████████| 2942/2942 [00:14<00:00, 209.11it/s]


2020-09-28T06:00Z, 2020-11-27T00:00Z, 2021-03-30T06, 2021-10-14T00 and 2022-06-29T12 seem to be lost

In [48]:
base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
params = {'station': 'KBOS',
          'model': 'LAV',
          'runtime': '2020-11-26T06:00Z'}
response = requests.get(base_url, params=params)
print(response)
result = pd.DataFrame(response)
#result.to_csv(f'Data/GLAMP IFR data/{time[:-4]}Z.csv')

<Response [200]>


In [80]:
base_url = 'https://mesonet.agron.iastate.edu/api/1/mos.json'
params = {'station': 'KBOS',
          'model': 'LAV',
          'runtime': '2020-07-11T12:00Z'}
response = requests.get(base_url, params=params)
print(response)
result = pd.DataFrame(response)
#result.to_csv(f'Data/GLAMP IFR data/{time[:-4]}Z.csv')

<Response [404]>


In [20]:
H2_path = 'Data/hrrr/20210722/subset_dfa361b29e2779abb789dd9ce7618f6cf8d1bd80__hrrr.t00z.wrfsfcf00.grib2'

In [21]:
file =  Nio.open_file(H2_path,"r")

In [16]:
file.close()

https://www.analyticsvidhya.com/blog/2021/06/understanding-random-forest/ good sklearn tutorial

In [18]:
import datetime as dt

In [38]:
f'{str(dt.datetime.now())[:10]}T{str(dt.datetime.now())[11:13]}:00Z'

'2022-09-14T10:00Z'

'10'

In [35]:
str(dt.datetime.now())

'2022-09-14 10:07:45.830238'