In [31]:
import xarray as xr
import pandas as pd
import glob
import numpy as np

In [2]:
xr.set_options(enable_cftimeindex=False)



<xarray.core.options.set_options at 0x7fe6c2215198>

In [3]:
import sys
sys.path.append('../scripts/')
from Model import *

In [3]:
from helper_funcs import get_model_files, get_EnsMean_year_files

In [4]:
DATA_DIR = '/media/abhi/My_Passport/NEX-GDDP-NASA-OUTPUT'

In [5]:
def get_region_year_files(model, variable, scen, region='Amravati'):
    if 'EnsMean' in model:
        dir1, dir2 = model.split('_')
        model = 'EnsMean'
    else:
        if model in ['BCC-CSM1-1', 'INMCM4']:
            model = model.lower()
            
        dir1, dir2 = 'Models', model
    
    p = f'{DATA_DIR}/region-wise/{region}/year-wise/{dir1}/{dir2}/{variable}/'
    p += f'{variable}*{scen}*'
    files = glob.glob(p)
    files_year = list(map(lambda x: x.split('_')[-1], files))
    sorted_files = [f for _, f in sorted(zip(files_year, files))]
    
    return(sorted_files)

In [31]:
files = get_region_year_files('ACCESS1-0', 'pr', 'historical')

In [76]:
m_correct = xr.open_mfdataset(f'{DATA_DIR}/region-wise/Amravati/year-wise/Models/bcc-csm1-1/pr/*hist*.nc').compute()

In [78]:
m_correct.time.values = m_correct.indexes['time'].to_datetimeindex()

  """Entry point for launching an IPython kernel.


In [79]:
m_correct = m_correct.rename({'pr': 'value'})

In [80]:
m_correct.resample(time='D').mean(dim='time')

<xarray.Dataset>
Dimensions:  (time: 20454)
Coordinates:
  * time     (time) datetime64[ns] 1950-01-01 1950-01-02 ... 2005-12-31
Data variables:
    value    (time) float32 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0

In [13]:
variableIndex = pd.Index(['pr', 'tasmax', 'tasmin'], name='variable')
modelIndex = pd.Index(models, name='model')

In [86]:
def correct_time_index(ds):
    try:
        ds.time.values = ds.indexes['time'].to_datetimeindex()
    except AttributeError:
        pass
    finally:
        _, index = np.unique(ds['time'], return_index=True)
        ds = ds.isel(time=index)
#         ds = ds.reindex_like(m_correct)
        
    return(ds)

In [87]:
def make_combined_ds(concat_over, **kwargs):
    if concat_over == 'variable':
        dsets = [correct_time_index(xr.open_mfdataset(
                 get_region_year_files(**kwargs, 
                                  variable=variable))[variable].compute())
                for variable in variableIndex]
        
        ds = xr.concat(dsets, dim=variableIndex).to_dataset(name='value')
        
    elif concat_over == 'model':
        pass
        
        
    
    return ds

In [51]:
ds = make_combined_ds('variable', model='ACCESS1-0', scen='historical')

In [52]:
_, index  = np.unique(ds['time'], return_index=True)

In [53]:
ds = ds.isel(time=index)

In [55]:
ds.sel(variable='pr').reindex_like(m_correct)

<xarray.Dataset>
Dimensions:   (time: 20440)
Coordinates:
  * time      (time) object 1950-01-01 12:00:00 ... 2005-12-31 12:00:00
    variable  <U2 'pr'
Data variables:
    value     (time) float32 nan nan nan nan nan nan ... nan nan nan nan nan nan

In [63]:
ds.mean(dim='variable')

<xarray.Dataset>
Dimensions:  (time: 20454)
Coordinates:
  * time     (time) datetime64[ns] 1950-01-01T12:00:00 ... 2005-12-31T12:00:00
Data variables:
    value    (time) float32 196.0362 195.90332 196.20183 ... 195.9281 195.93474

In [88]:
dsets = {}

In [89]:
%%time

scen_name = 'historical'
for model in models:
    dsets[model] = make_combined_ds('variable', model=model, scen=scen_name)

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


CPU times: user 49.4 s, sys: 1.48 s, total: 50.9 s
Wall time: 1min 5s


In [90]:
dsets

{'ACCESS1-0': <xarray.Dataset>
 Dimensions:   (time: 20454, variable: 3)
 Coordinates:
   * time      (time) datetime64[ns] 1950-01-01T12:00:00 ... 2005-12-31T12:00:00
   * variable  (variable) object 'pr' 'tasmax' 'tasmin'
 Data variables:
     value     (variable, time) float32 0.0 0.0 0.0 ... 286.10254 286.3155,
 'BCC-CSM1-1': <xarray.Dataset>
 Dimensions:   (time: 20440, variable: 3)
 Coordinates:
   * time      (time) datetime64[ns] 1950-01-01T12:00:00 ... 2005-12-31T12:00:00
   * variable  (variable) object 'pr' 'tasmax' 'tasmin'
 Data variables:
     value     (variable, time) float32 0.0 0.0 0.0 ... 285.4476 284.57675,
 'BNU-ESM': <xarray.Dataset>
 Dimensions:   (time: 20440, variable: 3)
 Coordinates:
   * time      (time) datetime64[ns] 1950-01-01T12:00:00 ... 2005-12-31T12:00:00
   * variable  (variable) object 'pr' 'tasmax' 'tasmin'
 Data variables:
     value     (variable, time) float32 0.0 0.0 0.0 ... 286.3936 285.72546,
 'CanESM2': <xarray.Dataset>
 Dimensions:   (time:

In [102]:
dsets_list = list(dsets.values())

In [103]:
m1, m2 = dsets_list[0:2]

In [106]:
for m_name, m in dsets.items():
    dsets[m_name] = m.reindex_like(m2)

In [108]:
ds = xr.concat(dsets.values(), dim=modelIndex)

In [110]:
import _pickle as cpickle

In [111]:
with open('../../pickles/Amravati_ALL21_ds.pkl', 'wb') as f:
    cpickle.dump(ds, f)

In [137]:
df = ds.to_dataframe().reset_index()

df['scen'] = 'historical'

df.loc[df.variable == 'pr', 'value'] = df.loc[df.variable == 'pr', 'value']*86400

df.loc[df.variable != 'pr', 'value'] = df.loc[df.variable != 'pr', 'value'] - 273.15

In [141]:
df.to_pickle('../../pickles/Amravati_ALL21_df.pkl')

In [142]:
df.to_csv('../../pickles/Amravati_ALL21_df.csv')

In [143]:
df

Unnamed: 0,model,time,variable,value,scen
0,ACCESS1-0,1950-01-01 12:00:00,pr,0.000000,historical
1,ACCESS1-0,1950-01-01 12:00:00,tasmax,29.010162,historical
2,ACCESS1-0,1950-01-01 12:00:00,tasmin,12.798431,historical
3,ACCESS1-0,1950-01-02 12:00:00,pr,0.000000,historical
4,ACCESS1-0,1950-01-02 12:00:00,tasmax,29.049164,historical
5,ACCESS1-0,1950-01-02 12:00:00,tasmin,12.360809,historical
6,ACCESS1-0,1950-01-03 12:00:00,pr,0.000000,historical
7,ACCESS1-0,1950-01-03 12:00:00,tasmax,29.375641,historical
8,ACCESS1-0,1950-01-03 12:00:00,tasmin,12.929871,historical
9,ACCESS1-0,1950-01-04 12:00:00,pr,0.000000,historical


In [150]:
def make_scen_datasets(scen, save=True):
    
    for model in models:
        dsets[model] = make_combined_ds('variable', model=model, scen=scen)
        
    m_correct = dsets['BCC-CSM1-1']
    
    for m_name, m in dsets.items():
        dsets[m_name] = m.reindex_like(m_correct)
        
    ds = xr.concat(dsets.values(), dim=modelIndex)
    
    df = ds.to_dataframe().reset_index()

    df['scen'] = scen
    df.loc[df.variable == 'pr', 'value'] = df.loc[df.variable == 'pr', 'value']*86400
    df.loc[df.variable != 'pr', 'value'] = df.loc[df.variable != 'pr', 'value'] - 273.15
    
    datasets = {'ds': ds, 'df': df}
    
    if save:
        for dset_name, dset in datasets.items():
            with open(f'../../pickles/Amravati/Amravati_ALL21_{scen}_{dset_name}.pkl', 'wb') as f:
                cpickle.dump(dset, f)
                
        datasets['df'].to_csv(f'../../pickles/Amravati/Amravati_ALL21_{scen}_{dset_name}.csv')
    
    return(ds, df)

In [154]:
make_scen_datasets('historical')

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


(<xarray.Dataset>
 Dimensions:   (model: 21, time: 20440, variable: 3)
 Coordinates:
   * time      (time) datetime64[ns] 1950-01-01T12:00:00 ... 2005-12-31T12:00:00
   * variable  (variable) object 'pr' 'tasmax' 'tasmin'
   * model     (model) object 'ACCESS1-0' 'BCC-CSM1-1' ... 'NorESM1-M'
 Data variables:
     value     (model, variable, time) float32 0.0 0.0 0.0 ... 290.15585 289.4505,
              model                time variable      value        scen
 0        ACCESS1-0 1950-01-01 12:00:00       pr   0.000000  historical
 1        ACCESS1-0 1950-01-01 12:00:00   tasmax  29.010162  historical
 2        ACCESS1-0 1950-01-01 12:00:00   tasmin  12.798431  historical
 3        ACCESS1-0 1950-01-02 12:00:00       pr   0.000000  historical
 4        ACCESS1-0 1950-01-02 12:00:00   tasmax  29.049164  historical
 5        ACCESS1-0 1950-01-02 12:00:00   tasmin  12.360809  historical
 6        ACCESS1-0 1950-01-03 12:00:00       pr   0.000000  historical
 7        ACCESS1-0 1950-01-03 

In [153]:
for scen_name in scen[1:]:
    make_scen_datasets(scen_name)

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until
