In [2]:
# %load test_biascorrect.py
from sparkxarray.downscale import biascorrect as bc
import numpy as np
import xarray as xr
import numpy as np
import pandas as pd
import seaborn as sns # pandas aware plotting library

np.random.seed(123)


n = 1000

raw_data = np.random.uniform(low=0, high=40, size=(10,))
obs = np.random.uniform(low=0.5, high=13.3, size=(n,))
mod = np.random.uniform(low=1.5, high=19.3, size=(n,))



a = bc.Biascorrect(obs_data=obs, model_data=mod, raw_data=raw_data)

print("Fake observed data: \n{} \n".format(a.obs_data))
print("Fake model data: \n{} \n".format(a.model_data))
bc_data =  a.qpqm()
print(bc_data.shape)
assert(raw_data.shape == bc_data.shape)
#assert raw_data == bc_data





In [6]:
times = pd.date_range('2000-01-01', '2001-12-31', name='time')
annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28))

# Make some Fake observation dataset
base = 10 + 15 * annual_cycle.reshape(-1, 1)
tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3)
tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3)

obs_ds = xr.Dataset({'tmin': (('time', 'location'), tmin_values),
                 'tmax': (('time', 'location'), tmax_values)},
                {'time': times, 'location': ['IA', 'IN', 'IL']})


# Make some Fake model dataset
base = 12 + 15 * annual_cycle.reshape(-1, 1)
tmin_values = base + 5 * np.random.randn(annual_cycle.size, 3)
tmax_values = base + 12 + 3 * np.random.randn(annual_cycle.size, 3)

mod_ds = xr.Dataset({'tmin': (('time', 'location'), tmin_values),
                 'tmax': (('time', 'location'), tmax_values)},
                {'time': times, 'location': ['IA', 'IN', 'IL']})

In [7]:
obs_ds

<xarray.Dataset>
Dimensions:   (location: 3, time: 731)
Coordinates:
  * time      (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
  * location  (location) <U2 'IA' 'IN' 'IL'
Data variables:
    tmin      (time, location) float64 -5.459 -6.895 -8.714 0.4239 -3.835 ...
    tmax      (time, location) float64 2.658 6.414 3.473 6.414 9.572 5.895 ...

In [8]:
mod_ds

<xarray.Dataset>
Dimensions:   (location: 3, time: 731)
Coordinates:
  * time      (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
  * location  (location) <U2 'IA' 'IN' 'IL'
Data variables:
    tmin      (time, location) float64 -3.641 4.729 -0.08711 0.6363 -3.061 ...
    tmax      (time, location) float64 9.162 11.54 11.62 7.307 8.869 5.658 ...

In [47]:
@xr.register_dataset_accessor('bias')
class Bias(object):
    def __init__(self, xarray_ob1, xarray_ob2):
        self._obs = xarray_ob1
        self._model = xarray_ob2
        self._bc = None
        self._mean = None
    
    @property
    def mean(self):
        if self._mean is None:
            data = self._obs.tmax.values
            self._mean = np.mean(data)
            
        return self._mean
    
    @property
    def bias_correct(self):
        if self._bc == None:
            self._bc = self._obs.mean(dim='location')
            
        return self._bc
 

  


In [48]:
ds = Bias(obs_ds, mod_ds)

In [49]:
ds.bias_correct.tmax

<xarray.DataArray 'tmax' (time: 731)>
array([ 4.181262,  7.2937  ,  7.16931 , ...,  4.843581,  4.642949,  3.297611])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...

In [50]:
ds._obs.tmax

<xarray.DataArray 'tmax' (time: 731, location: 3)>
array([[  2.657568,   6.413621,   3.472597],
       [  6.414292,   9.571507,   5.895302],
       [  3.269672,   6.468234,  11.770023],
       ..., 
       [  3.097954,   7.325948,   4.10684 ],
       [  8.455372,   3.131549,   2.341927],
       [  6.795273,   2.879086,   0.218475]])
Coordinates:
  * time      (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
  * location  (location) <U2 'IA' 'IN' 'IL'

In [32]:
ds._model

<xarray.Dataset>
Dimensions:   (location: 3, time: 731)
Coordinates:
  * time      (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 ...
  * location  (location) <U2 'IA' 'IN' 'IL'
Data variables:
    tmin      (time, location) float64 -3.641 4.729 -0.08711 0.6363 -3.061 ...
    tmax      (time, location) float64 9.162 11.54 11.62 7.307 8.869 5.658 ...

In [33]:
ds

<__main__.Bias at 0x7f84b79d2dd8>

In [35]:
ds._model.to_dataframe().describe()

Unnamed: 0,tmin,tmax
count,2193.0,2193.0
mean,12.199734,23.992827
std,11.771917,10.962656
min,-18.61708,1.202007
25%,2.27292,13.789882
50%,12.341318,23.897565
75%,22.189517,34.06722
max,42.139493,46.963557
