In [1]:
import hsa
import numpy as np
import xarray as xr
import xarray.ufuncs as xu
from dask.diagnostics import ProgressBar
import scipy.stats as ss
import paths as ps
from datetime import datetime, timedelta

In [3]:
lons = np.arange(180,310.1,0.5)
lats = np.arange(20,80.1,0.5)
nfa = hsa.NewForecastArray('mean','slp',24)
gefs = nfa.load_forecast(subset_lat=lats,subset_lon=lons)


In [4]:

variable = 'slp'
with open(ps.log_directory + 'current_run.txt', "r") as f:
    model_date=datetime.strptime(f.readlines()[-1][5:13],'%Y%m%d')
mc = hsa.MClimate(model_date, variable, 24, percentage=10)

In [5]:
def xarr_interpolate(original, new, on='latlon'):
    if on == 'latlon':
        new_lat = [i for i in new.coords if 'lat' in i][0]
        new_lon = [i for i in new.coords if 'lon' in i][0]
        old_lat = [i for i in original.coords if 'lat' in i][0]
        old_lon = [i for i in original.coords if 'lon' in i][0]
        original_i = original.interp({old_lat : new[new_lat].values}).interp({old_lon : new[new_lon].values})
        return original_i
    else:
        raise Exception('latlon interpolation only works as of now...')

In [6]:
mc_mu = xarr_interpolate(mc.generate(type='mean',dask=True),gefs)

In [12]:
mc_mu

<xarray.Dataset>
Dimensions:   (lat: 121, lon: 261, time: 630)
Coordinates:
    fhour     timedelta64[ns] 1 days
  * time      (time) datetime64[ns] 2012-09-30 2012-09-30 ... 2012-10-20
  * lat       (lat) float64 80.0 79.5 79.0 78.5 78.0 ... 21.5 21.0 20.5 20.0
  * lon       (lon) float64 180.0 180.5 181.0 181.5 ... 308.5 309.0 309.5 310.0
Data variables:
    Pressure  (time, lat, lon) float32 dask.array<chunksize=(1, 121, 261), meta=np.ndarray>
Attributes:
    Conventions:  CF-1.0
    title:        Subset of data from 2nd-generation multi-decadal ensemble r...
    institution:  NOAA Earth System Research Laboratory (ESRL)
    source:       NCEP GFS v 9.01, T254L42.  Control initial conditions from ...
    references:   http://www.esrl.noaa.gov/psd/forecasts/reforecast2/index.html
    history:      Subset created 2019-10-03 03:57:07 UTC
    comment:      Original dataset generated on DOE's supercomputers at Lawre...

In [7]:

def hsa_n(variable):
    start = datetime.now()
    with open(ps.log_directory + 'current_run.txt', "r") as f:
        model_date=datetime.strptime(f.readlines()[-1][5:13],'%Y%m%d')
    for f in range(0,7,6):
        start = datetime.now()
        nfa = hsa.NewForecastArray('mean','slp',f)
        gefs = nfa.load_forecast(subset_lat=lats,subset_lon=lons)
        print(f'gefs took {(datetime.now()-start).total_seconds():.2f}')
        start = datetime.now()
        mc = hsa.MClimate(model_date, variable, f, percentage=10)
        mc_mu = xarr_interpolate(mc.generate(type='mean',dask=True),gefs)
        print(f'mu interpolate took {(datetime.now()-start).total_seconds():.2f}')
        start = datetime.now()
        mc_std = xarr_interpolate(mc.generate(type='sprd',dask=True),gefs)
        print(f'sigma interpolate took {(datetime.now()-start).total_seconds():.2f}')
    return mc_mu, gefs

In [8]:
lons = np.arange(180,310.1,0.5)
lats = np.arange(20,80.1,0.5)
mc_mu,gefs=hsa_n('slp')

gefs took 3.07
mu interpolate took 0.23
sigma interpolate took 0.23
gefs took 4.78
mu interpolate took 0.31
sigma interpolate took 0.23


In [22]:
gefs_renamed = gefs.rename_dims({'latitude':'lat','longitude':'lon'}).rename_vars({'latitude':'lat','longitude':'lon'})

In [24]:
merged = xr.merge([mc_mu,gefs_renamed.chunk()])

In [25]:
merged

<xarray.Dataset>
Dimensions:     (lat: 121, lon: 261, time: 630)
Coordinates:
    fhour       timedelta64[ns] 06:00:00
  * time        (time) datetime64[ns] 2012-10-01 2012-10-01 ... 2012-10-21
  * lat         (lat) float64 80.0 79.5 79.0 78.5 78.0 ... 21.5 21.0 20.5 20.0
  * lon         (lon) float64 180.0 180.5 181.0 181.5 ... 309.0 309.5 310.0
    step        timedelta64[ns] 06:00:00
    meanSea     int64 0
    valid_time  datetime64[ns] 2019-10-11T12:00:00
Data variables:
    Pressure    (time, lat, lon) float32 dask.array<chunksize=(1, 121, 261), meta=np.ndarray>
    prmsl       (lat, lon) float32 dask.array<chunksize=(121, 261), meta=np.ndarray>

In [14]:
import dask.array as dask_array

def dask_percentile(arr, axis=0):
    if len(arr.chunks[axis]) > 1:
        msg = ('Input array cannot be chunked along the percentile '
               'dimension.')
        raise ValueError(msg)
    return dask_array.map_blocks(ss.percentileofscore, arr, axis=axis,
                                 drop_axis=axis)

def percentile(arr, axis=0):
    if isinstance(arr, dask_array.Array):
        return dask_percentile(arr, axis=axis)
    else:
        return ss.percentileofscore(arr, axis=axis)

In [26]:
xu.sin(merged)

<xarray.Dataset>
Dimensions:     (lat: 121, lon: 261, time: 630)
Coordinates:
    fhour       timedelta64[ns] 06:00:00
  * time        (time) datetime64[ns] 2012-10-01 2012-10-01 ... 2012-10-21
  * lat         (lat) float64 80.0 79.5 79.0 78.5 78.0 ... 21.5 21.0 20.5 20.0
  * lon         (lon) float64 180.0 180.5 181.0 181.5 ... 309.0 309.5 310.0
    step        timedelta64[ns] 06:00:00
    meanSea     int64 0
    valid_time  datetime64[ns] 2019-10-11T12:00:00
Data variables:
    Pressure    (time, lat, lon) float32 dask.array<chunksize=(1, 121, 261), meta=np.ndarray>
    prmsl       (lat, lon) float32 dask.array<chunksize=(121, 261), meta=np.ndarray>