In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from cdo import Cdo
import subprocess
from helpers import *

In [3]:
data_dir = '../../precip_data/era5_data/'
data_save = '../../precip_data/predictors/'
target_grid_file = 'targetgrid.txt'

In [5]:
for variable in [ 'total_column_water_vapour', 'total_column_cloud_liquid_water','total_cloud_cover']:
    input_file = data_dir + 'era5_'+variable+'.nc'
    output_file = data_dir + 'era5_'+variable+'_regrid.nc'
    con_grid(input_file, output_file, target_grid_file)  

In [4]:
variables = ['k_index',                      
             '2m_temperature',
             '2m_dewpoint_temperature', 
             'total_column_water_vapour', 
             'total_column_cloud_liquid_water', 
             'total_cloud_cover',
             'vertically_integrated_moisture_divergence', 
             'convective_available_potential_energy',  
             'convective_inhibition', 
             'shear', 
             'pressure_tendency', 
             #'stream',
             'temperature_500',
             'temperature_850',           
             'specific_humidity_500',    
             'specific_humidity_600', 
             'specific_humidity_700', 
             'specific_humidity_925', 
             'relative_humidity_300',     
             'relative_humidity_500',        
            ]

In [None]:
train_t = pd.date_range(start='12/02/2000T06', end='12/01/2018T06')
test_t = pd.date_range(start='12/02/2018T06', end='12/01/2019T06')
train_time = train_t - pd.Timedelta(hours=30)
test_time = test_t - pd.Timedelta(hours=30)

for variable in variables:
    if variable in ['total_column_water_vapor', 'total_column_cloud_liquid_water', 'total_cloud_cover']:
        data = xr.open_dataset(data_dir + 'era5_'+variable+'_regrid.nc')
    elif variable == 'pressure_tendency':
        data = get_pressure_tendency(data_dir)
    elif variable == 'shear':
        data = get_shear(data_dir)
    elif variable == 'stream':
        data = get_stream(data_dir)
    elif variable == 'vertically_integrated_moisture_divergence':
        data = accum_vimd(data_dir)
    else:
        data = xr.open_dataset(data_dir + 'era5_'+variable+'.nc')
        # select 1x1
        data = sel_grid(data)

    if variable in ['temperature_500','temperature_850','specific_humidity_500', 'specific_humidity_600', 'specific_humidity_700', 
             'specific_humidity_925',  'relative_humidity_300', 'relative_humidity_500']:
        data = data.squeeze()
        data = data.drop_vars(['number', 'expver'])

    if variable == 'convective_inhibition':
        data = data.fillna(0)
    

    data_train = data.sel(valid_time = train_time)
    data_test = data.sel(valid_time = test_time)

    var_name = get_name(variable)

    data_train.to_netcdf(data_save + 'train/'+var_name+'_2000_2018.nc')
    data_test.to_netcdf(data_save + 'test/'+var_name+'_2019.nc')