In [1]:
import os
import pickle
import xarray as xr
import numpy as np
import pandas as pd
from pyproj import CRS, Transformer, Proj
from scipy.spatial.distance import cdist
from metpy.calc import dewpoint_from_relative_humidity
from metpy.units import units
import geopandas as gpd
import logging

logging.basicConfig(filename='logs.log', level=logging.INFO)


In [2]:
path_geojson = "/glade/p/cisl/aiml/jtti_tornado/gridrad_poly_geojson/"
path_rap = "/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/"
path_save = "/glade/p/cisl/aiml/jtti_tornado/gridrad_soundings/"
t

In [3]:
varsPressure = ['HGT', 'TMP', 'RH', 'UGRD', 'VGRD', 'VVEL']

def df_flatten(ds, x, y, varsP):
    
    df = ds.isel(x=x,y=y).to_dataframe()[varsP]
    idx0 = df.index.levels[0].astype(int).astype(str)
    idx1 = df.index.levels[1]
    df.index = df.index.set_levels([idx0, idx1])
    df = df.unstack(level='press').sort_index()
    df.columns = df.columns.map('_'.join)
    
    return df


In [4]:
def find_coord_indices(lon_array, lat_array, lon_points, lat_points, dist_proj='lcc_RAP'):
    """
    Find indices of nearest lon/lat pair on a grid. Supports rectilinear and curilinear grids.
    lon_points / lat_points must be received as a list.
    Args:
        lon_array (np.array): Longitude values of coarse grid you are matching against
        lat_array (np.array): Latitude values of coarse grid you are matching against
        lon_points (list): List of Longitude points from orginal grid/object
        lat_points (list): List of Latitude points from original grid/object
        dist_proj (str): Name of projection for pyproj to calculate distances
    Returns (list):
        List of i, j (Lon/Lat) indices for coarse grid.
    """
    if dist_proj == 'lcc_WRF':
        proj = Proj(proj='lcc', R=6371229, lat_0=38, lon_0=-97.5, lat_1=32, lat_2=46)  ## from WRF HWT data
    if dist_proj == 'lcc_RAP':
        proj = Proj(proj='lcc', R=6371229, lat_0=25, lon_0=265, lat_1=25, lat_2=25)

    proj_lon, proj_lat = np.array(proj(lon_array, lat_array))  # transform to distances using specified projection
    lonlat = np.column_stack(
        (proj_lon.ravel(), proj_lat.ravel()))  # Stack all coarse x, y distances for array shape (n, 2)
    ll = np.array(proj(lon_points, lat_points)).T  # transform lists of fine grid x, y to match shape (n, 2)
    idx = cdist(lonlat, ll).argmin(0)  # Calculate all distances and get index of minimum

    return np.column_stack((np.unravel_index(idx, lon_array.shape))).tolist()


In [5]:
def calc_dewpoint(df): # Create T_DEWPOINT columns from RH and TMP
    if df.isnull().any().any():
        print(f"DEWPOINT CONVERSION: {df['date_rap'][0]}")
        print(df[df.isnull().any(axis=1)][['datetime'] + list(df.columns[df.isna().any()])])
        df = df[~df.isnull().any(axis=1)]
    for p in list(range(100, 1025, 25)):
        df_RH = units.Quantity(np.array(df[f'RH_{p}'])/100., "dimensionless")
        df_TMP =  units.Quantity(np.array(df[f'TMP_{p}']), "K")
        df[f'T_DEWPOINT_{p}'] = dewpoint_from_relative_humidity(df_TMP, df_RH) 
    return df


In [6]:
def convert_KtoC(df, varsUnits_dict):
    # change variables from Kelvin to Celsius
    for var, units in varsUnits_dict.items():
        if units == 'K':
            try:
                df[var] = df[var] - 273.15            
                varsUnits_dict[var] = 'C'
            except:
                continue
    return df


In [7]:
def add_units(df, varsUnits_dict):
    # Rename columns to include units
    for column in list(df.columns):
        if column in list(varsUnits_dict.keys()):
            df.rename(columns={column: column + '_' + varsUnits_dict[column]}, inplace=True)
    return df


In [9]:
def replace_RH0(df):
    
    pressure_levels = list(range(100, 1025, 25))
    dewpoint_levels = [f'T_DEWPOINT_{p}_C' for p in pressure_levels]

    RH0_count = 0
    if df.isna().sum().sum() > 0:
        idx, idx_col = np.where(df[dewpoint_levels].isnull())
        RH0_count += len(idx)
        for ix, ixc in zip(idx, idx_col):
            press = pressure_levels[ixc]
            if df.at[ix, f'RH_{press}_percent'] == 0.0:
                df.at[ix, f'RH_{press}_percent'] == 1.0
                RH = units.Quantity(1.0/100., "dimensionless")
            else:
                print(f"RH not 0.0% at {ix, ixc} in file {f}")
                logging.info(f"RH not 0.0% at {ix, ixc} in file {f}")
            TMP =  units.Quantity(df.at[ix, f'TMP_{press}_C'] + 273.15, "K")
            df.at[ix, dewpoint_levels[ixc]] = np.array(dewpoint_from_relative_humidity(TMP, RH))
    print(f"Elements affected by 0% RH: {RH0_count}")
    logging.info(f"Elements affected by 0% RH: {RH0_count}")
    return df


In [16]:
dates_geojson = [x[-16:-8] for x in os.listdir(os.path.join(path_geojson)) if x.endswith('.geojson')]
dates_done = [x[-12:-4] for x in os.listdir(os.path.join(path_save, "final_rap")) if x.endswith('.csv')]
dates_missing = list(set(dates_geojson) - set(dates_done))
dates_missing


['20120413',
 '20111008',
 '20151223',
 '20111007',
 '20110529',
 '20120409',
 '20110608',
 '20130826',
 '20120414',
 '20110305',
 '20110610',
 '20110612',
 '20110627']

In [18]:
lat = 'centroid_latitude_deg'
lon = 'centroid_longitude_deg'
with open(os.path.join("/glade/p/cisl/aiml/ai2es/winter_ptypes/precip_rap/", "varsUnits_dict.pkl"), 'rb') as f:
    varsUnits_dict = pickle.load(f)
    
datetimes_missing = []
# for f in os.listdir(os.path.join(path_geojson)):
for d in dates_missing:
    f = f"storm_poly_{d}.geojson"
    df = gpd.read_file(os.path.join(path_geojson, f))
    df = df.drop('geometry', 1)
    df['datetime'] = pd.to_datetime(df['valid_time_unix_sec'], unit='s')
    df['datetime_rap'] = df['datetime'].dt.floor(freq='H')
    df['date_rap'] = df['datetime_rap'].dt.strftime('%Y%m%d')
    df['hour_rap'] = df['datetime_rap'].dt.strftime('%X').str[:2]
    date = df['date_rap'][0]
    logging.info(f'{f}')
    dfs = []
    for name, datetime_chunk in df.groupby('datetime_rap'):
        hour = name.strftime('%H')
        # try to open a dataset if one is available and not corrupted
        try:
            ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
        except FileNotFoundError:
            try:
                ds= xr.open_dataset(os.path.join(path_rap, date, f"ruc2anl_130_{date}_{hour}00_000.nc"))
            except Exception as e:
                print(date, hour, e)
                logging.info(date, hour, e)
                datetimes_missing.append(name)
                continue

        # calculate projected indices
        datetime_chunk['idx'] = find_coord_indices(ds['longitude'].values, ds['latitude'].values,
                                                   list(datetime_chunk[lon]), list(datetime_chunk[lat]))

        # create new merged dataframe
        for index, row in datetime_chunk.iterrows():
            try:
                ds_temp = df_flatten(ds, row['idx'][1], row['idx'][0], varsPressure)
            except Exception as e:
                print("\t- ", date, hour, e)
                logging.info("\t- ", date, hour, e)
                continue
            df_temp = pd.DataFrame(row).T.merge(ds_temp.rename(index={0:row.name}), left_on='datetime_rap', right_on='time')
            dfs.append(df_temp)
            
    if len(dfs) > 0:
        df_save = pd.concat(dfs, ignore_index=True)
        # add dewpoint, convert K to C, rename columns to add units, sort by datetime, and save
        df_save = calc_dewpoint(df_save)
        df_save = convert_KtoC(df_save, varsUnits_dict)
        df_save = add_units(df_save, varsUnits_dict)
        df_save = replace_RH0(df_save)
        df_save = df_save.sort_values(by="datetime_rap")
        print(f"For {date}, was able to load {df_save.shape[0]} rows out of {df.shape[0]}")
        logging.info(f"For {date}, was able to load {df_save.shape[0]} rows out of {df.shape[0]}")
        if 0 in df_save.shape:
            print(f"Nothing to save for {date}")
            logging.info(f"Nothing to save for {date}")
        else:
            df_save.to_parquet(os.path.join(path_save, "final_rap", f"{f.replace('geojson', 'parquet')}"))
            df_save.to_csv(os.path.join(path_save, "final_rap", f"{f.replace('geojson', 'csv')}"))


20120413 19 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_1900_000.nc'
20120413 20 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_2000_000.nc'
20120413 21 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_2100_000.nc'
20120413 22 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_2200_000.nc'


--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/rap_130_20120413_1900_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20120413 23 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_2300_000.nc'
20120413 00 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_0000_000.nc'
20120413 01 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_0100_000.nc'
20120413 02 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_0200_000.nc'
20120413 03 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_0300_000.nc'
20120413 04 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_0400_000.nc'
20120413 05 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120413/ruc2anl_130_20120413_0500_000.nc'
20120413 06 [

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20151223/rap_130_20151223_1300_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20111007 21 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_2100_000.nc'
20111007 22 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_2200_000.nc'
20111007 23 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_2300_000.nc'


--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/rap_130_20111007_2100_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20111007 08 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_0800_000.nc'
20111007 09 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_0900_000.nc'
20111007 10 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_1000_000.nc'
20111007 11 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/ruc2anl_130_20111007_1100_000.nc'


--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20111007/rap_130_20111007_0800_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

Elements affected by 0% RH: 0
For 20111007, was able to load 0 rows out of 3010
Nothing to save for 20111007
20110529 15 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/ruc2anl_130_20110529_1500_000.nc'
20110529 16 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/ruc2anl_130_20110529_1600_000.nc'
20110529 17 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/ruc2anl_130_20110529_1700_000.nc'
20110529 18 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/ruc2anl_130_20110529_1800_000.nc'
20110529 19 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/ruc2anl_130_20110529_1900_000.nc'
20110529 20 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/ruc2anl_130_20110529_2000_000.nc'
20110529 21 [Errno 2] No such file or directo

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110529/rap_130_20110529_1500_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20120409 00 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120409/ruc2anl_130_20120409_0000_000.nc'
20120409 01 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120409/ruc2anl_130_20120409_0100_000.nc'
20120409 02 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120409/ruc2anl_130_20120409_0200_000.nc'
20120409 03 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120409/ruc2anl_130_20120409_0300_000.nc'
20110608 21 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110608/ruc2anl_130_20110608_2100_000.nc'
20110608 22 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110608/ruc2anl_130_20110608_2200_000.nc'
20110608 23 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110608/ruc2anl_130_20110608_2300_000.nc'
Elements affe

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20130826/rap_130_20130826_2100_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20130826 22 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20130826/ruc2anl_130_20130826_2200_000.nc'
20130826 23 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20130826/ruc2anl_130_20130826_2300_000.nc'
Elements affected by 0% RH: 0
For 20130826, was able to load 0 rows out of 1051
Nothing to save for 20130826
20120414 15 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120414/ruc2anl_130_20120414_1500_000.nc'
20120414 16 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120414/ruc2anl_130_20120414_1600_000.nc'
20120414 17 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120414/ruc2anl_130_20120414_1700_000.nc'
20120414 18 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120414/ruc2anl_130_20120414_1800_000.nc'
20120414 19 [Errno 2] No such file or directo

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20120414/rap_130_20120414_1500_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20110305 12 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1200_000.nc'
20110305 13 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1300_000.nc'
20110305 14 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1400_000.nc'
20110305 15 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1500_000.nc'
20110305 16 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1600_000.nc'
20110305 17 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1700_000.nc'
20110305 18 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/ruc2anl_130_20110305_1800_000.nc'
20110305 19 [

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110305/rap_130_20110305_1200_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

20110610 20 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110610/ruc2anl_130_20110610_2000_000.nc'
20110610 21 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110610/ruc2anl_130_20110610_2100_000.nc'
20110610 22 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110610/ruc2anl_130_20110610_2200_000.nc'
20110610 23 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110610/ruc2anl_130_20110610_2300_000.nc'


--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110610/rap_130_20110610_2000_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

Elements affected by 0% RH: 0
For 20110610, was able to load 0 rows out of 7809
Nothing to save for 20110610
20110612 15 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/ruc2anl_130_20110612_1500_000.nc'
20110612 16 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/ruc2anl_130_20110612_1600_000.nc'
20110612 17 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/ruc2anl_130_20110612_1700_000.nc'
20110612 18 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/ruc2anl_130_20110612_1800_000.nc'
20110612 19 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/ruc2anl_130_20110612_1900_000.nc'
20110612 20 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/ruc2anl_130_20110612_2000_000.nc'
20110612 21 [Errno 2] No such file or directo

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110612/rap_130_20110612_1500_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

Elements affected by 0% RH: 0
For 20110612, was able to load 0 rows out of 4172
Nothing to save for 20110612
20110627 21 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/ruc2anl_130_20110627_2100_000.nc'
20110627 22 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/ruc2anl_130_20110627_2200_000.nc'
20110627 23 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/ruc2anl_130_20110627_2300_000.nc'
20110627 00 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/ruc2anl_130_20110627_0000_000.nc'
20110627 01 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/ruc2anl_130_20110627_0100_000.nc'
20110627 02 [Errno 2] No such file or directory: b'/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/ruc2anl_130_20110627_0200_000.nc'
20110627 03 [Errno 2] No such file or directo

--- Logging error ---
Traceback (most recent call last):
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/file_manager.py", line 199, in _acquire_with_cache_info
    file = self._cache[self._key]
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/lru_cache.py", line 53, in __getitem__
    value = self._cache[key]
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/glade/p/cisl/aiml/conv_risk_intel/rap_ncei_nc/20110627/rap_130_20110627_2100_000.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-18-446fe6b4482f>", line 23, in <module>
    ds = xr.open_dataset(os.path.join(path_rap, date, f"rap_130_{date}_{hour}00_000.nc"))
  File "/glade/work/ggantos/miniconda2/envs/holo_torch/lib/python3.6/site-packages/xarray/backends/ap

In [None]:
with open("logs.log") as log:
    print(log.read())

In [None]:
for f in tqdm(os.listdir(os.path.join(path_geojson))):
    df = gpd.read_file(os.path.join(path_geojson, f))
    df = df.drop('geometry', 1)
    df['datetime'] = pd.to_datetime(df['valid_time_unix_sec'], unit='s')
    df['datetime_rap'] = df['datetime'].dt.floor(freq='H')
    df['date_rap'] = df['datetime_rap'].dt.strftime('%Y%m%d')
    df['hour_rap'] = df['datetime_rap'].dt.strftime('%X').str[:2]
    
    try:
        df_rap = pd.read_parquet(os.path.join(path_save, "final_rap", f"{f.replace('geojson', 'parquet')}"))
        df_merged = df.merge(df_rap, how='left', on=list(set(df_rap.columns).intersection(set(df.columns))))
        df_merged.to_parquet(os.path.join(path_save, "final_rap_all", f"{f.replace('geojson', 'parquet')}"))
        df_merged.to_csv(os.path.join(path_save, "final_rap_all", f"{f.replace('geojson', 'csv')}"))
    except Exception as e:
        print(f, e)
        df.to_parquet(os.path.join(path_save, "final_rap_all", f"{f.replace('geojson', 'parquet')}"))
        df.to_csv(os.path.join(path_save, "final_rap_all", f"{f.replace('geojson', 'csv')}"))
