<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Preamble" data-toc-modified-id="Preamble-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preamble</a></span></li><li><span><a href="#Xarrays-for-Different-Phases" data-toc-modified-id="Xarrays-for-Different-Phases-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Xarrays for Different Phases</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Indivual-Phases" data-toc-modified-id="Indivual-Phases-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Indivual Phases</a></span></li><li><span><a href="#Bootstrap" data-toc-modified-id="Bootstrap-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Bootstrap</a></span></li></ul></div>

# Preamble

In [1]:
# In[1]:


##############################

import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import dask.array
import cartopy.crs as ccrs
import pickle
import matplotlib.colors as colors
import datetime as dt
rb = plt.cm.RdBu
bm = plt.cm.Blues

import warnings
warnings.filterwarnings('ignore')

# In[2]:

In [2]:
path = 'RMM.pickle'
pickle_in = open(path, 'rb')
RMM = pickle.load(pickle_in)

In [3]:
# %load /home/563/ab2313/MJO/get_awap.py
import sys

def get_platform():
    platforms = {
        'linux1' : 'Linux',
        'linux2' : 'Linux',
        'darwin' : 'OS X',
        'win32' : 'Windows'
    }

    if sys.platform not in platforms:
        return sys.platform

    return platforms[sys.platform]


platform = get_platform()


if platform == 'OS X':
    path =  '/Users/alexborowiak/Desktop/large_files/'


   
    
else:
#     path = '/home/student.unimelb.edu.au/aborowiak/Desktop/Code/Scripts/big_files/'
    path = '/home/563/ab2313/big_files/'


# precip = xr.open_dataset(path + 'AWAP_w.nc')


In [4]:
precip = xr.open_dataset(path + 'AWAP_W.nc', chunks={'time':-1, 'lat': 50, 'lon': 50}).precip

In [5]:
RMM = RMM.reset_index()
RMM['Date'] = RMM['Date'] + pd.to_timedelta('9h')
RMM = RMM.set_index('Date')

In [6]:
import os 
cwd = os.getcwd() + '/'

save_dir = cwd + 'int_data/'

# Different Phases

The rainfall is divided into different phases

Returns:
* <b> phase_precip </b> All the rainfall data split into the MJO phases

Notes: The 90th and 95th percentile are not need here, this is what I am calculating

In [7]:

'''PURPOSE    Creating xarrays that contiain the rainfall data only for a specific phase of the MJO'''

regions = np.array([slice(110, 120),slice(120.25, 140),slice(140.25, 156.25)])
mjo_enhanced = np.array([[4,5],[4,5,6],[4,5,6,7]])


# In[10]:


d = {}
for i in range(3):
    RMM_enh = np.array(RMM[np.logical_and(RMM['Amplitude'] >= 1, RMM['Phase'].isin(mjo_enhanced[i]))].index)
    r1 = precip.sel(lon = regions[i])
    r1= r1.where(r1.time.isin(RMM_enh))
    d[str(i)] = r1

enhanced_precip = (d['0'].combine_first(d['1'])).combine_first(d['2'])


# In[11]:


d = {}
for i in range(3):
    RMM_enh = np.array(RMM[np.logical_and(RMM['Amplitude'] >= 1, ~RMM['Phase'].isin(mjo_enhanced[i]))].index)
    r1 = precip.sel(lon = regions[i])
    r1= r1.where(r1.time.isin(RMM_enh))
    d[str(i)] = r1

suppressed_precip = (d['0'].combine_first(d['1'])).combine_first(d['2'])


# In[12]:


d = {}
for i in range(3):
    RMM_enh = np.array(RMM[RMM['Amplitude'] < 1].index)
    r1 = precip.sel(lon = regions[i])
    r1= r1.where(r1.time.isin(RMM_enh))
    d[str(i)] = r1

inactive_precip = (d['0'].combine_first(d['1'])).combine_first(d['2'])

In [8]:
phase_precip = xr.concat([enhanced_precip,suppressed_precip,inactive_precip],
                        pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))

# Functions

In [9]:
def into_xr(data, orgininal, name = 'precip'):
    return xr.DataArray(
    data,
    dims=['lat','lon'],
    coords={'lat':orgininal.lat, 'lon': orgininal.lon},
    name=name
    )

In [10]:

def calculate_intfrac(data_sub,data_all,q ):

    # The q-th percentile of phase data
    perc_sub = data_sub.reduce(np.nanpercentile, q = q, dim = 'time')

    # The q-th percentile of all data
    perc_all = data_all.reduce(np.nanpercentile, q = q, dim = 'time')

    # This gives how the q-th percentile in the MJO phase compares to the q-th percentile of all rainfall
    frac = perc_sub/perc_all

    xr = into_xr(frac, data_all, 'precip')

    return xr

In [11]:
# In[8]:


def percentile_all_months_and_regins(phase_precip, all_precip, q):

    months = [10,11,12,1,2,3]
    month_storage = []
    
    # Looping through all of the different months
    for month in months:
        # Finding the values for a single month
        phase_prec_month = phase_precip.where(phase_precip.time.dt.month == month, drop = True)
        all_prec_month = all_precip.where(all_precip.time.dt.month == month, drop = True)
        
        # Calculating the 90th percentile for both the phase and all, then dividing phase by all
        # (data_sub, data_all)
        frac_month = calculate_intfrac(phase_prec_month ,all_prec_month, q)

        month_storage.append(frac_month)

    frac_int = xr.concat(month_storage, pd.Index(months, name = 'month'))

    return frac_int

# ENSO
Splitting my data into the various phases of ENSO

Returns:

* <b> phase_precip_enso </b> : ALl rainfall that has been split into the different phases of the MJO and ENSO

In [12]:
import pickle

In [13]:
path = '/home/563/ab2313/MJO/w_ENSO/nino_daily.pickle'
pickle_in = open(path,'rb')
nino34 = pickle.load(pickle_in)

# Changing the name of the index and adding 9 hours so that it matches with the AWAP times
nino34 = nino34.reset_index()
nino34['Date'] = nino34['Date'] + pd.to_timedelta('9h')
nino34 = nino34.set_index('Date')

# Only need this date range
nino34 = nino34.loc['1974':'2018']

#Needs sorting
nino34 = nino34.sort_index()

nino34.tail(3)

Unnamed: 0_level_0,nino34
Date,Unnamed: 1_level_1
2018-10-29 09:00:00,0.84
2018-10-30 09:00:00,0.84
2018-10-31 09:00:00,0.84


In [14]:
# This function here splits an xarray file into all the data that is in El Nino and La Nina

def split_via_enso(data):
    el_nino_dates = nino34[nino34['nino34'] > 0].index
    la_nina_dates = nino34[nino34['nino34'] <= 0].index
    
    # Now just getting the data for the El Nino and La Nina Dates
    data_elnino = data.where(data.time.isin(el_nino_dates))
    data_lanina = data.where(data.time.isin(la_nina_dates))

#     return data_elnino, data_lanina
    # Putting them both into the one xarray
    data_nino = xr.concat([data_elnino, data_lanina], pd.Index(['el nino', 'la nina'], name = 'nino'))
    
    return data_nino

In [15]:
# This is splitting all the rainfall data
phase_precip_enso = split_via_enso(phase_precip)

In [16]:
phase_precip_enso

<xarray.DataArray 'precip' (nino: 2, mjo: 3, time: 7822, lat: 53, lon: 178)>
dask.array<shape=(2, 3, 7822, 53, 178), dtype=float32, chunksize=(1, 1, 7822, 50, 50)>
Coordinates:
  * lon      (lon) float64 112.0 112.2 112.5 112.8 ... 155.5 155.8 156.0 156.2
  * time     (time) datetime64[ns] 1974-10-01T09:00:00 ... 2017-12-31T09:00:00
  * lat      (lat) float64 -23.0 -22.75 -22.5 -22.25 ... -10.5 -10.25 -10.0
  * mjo      (mjo) object 'enhanced' 'suppressed' 'inactive'
  * nino     (nino) object 'el nino' 'la nina'
Attributes:
    long_name:  Daily Precipitation
    units:      mm

# Indivual Phases


The idea of the calculation here is comparing the 90th percentile during an mjo phase to the 90th percentile
of all rainfall to see if there is an increase

In:
* <b>  phase_precip_enso </b> 

Saves:
* <b> int_50_nino </b>
* <b> int_90_nino </b>
* <b> int_95_nino </b>

### 50th Percentile

In [17]:
########## Selecting just a single phase of ENSO
q = 50


'''El Nino'''
nino = 'el nino'
mjo_phases = ['enhanced', 'suppressed', 'inactive']
mjo_storage = []
data = phase_precip_enso.sel(nino = nino)

for phase in mjo_phases:
    data_phase = data.sel(mjo = phase)
    intensity = percentile_all_months_and_regins(data_phase, precip , q )
    mjo_storage.append(intensity)
    
el_nino_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))
# el_nino_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))    




'''La Nina'''
nino = 'la nina'
mjo_phases = ['enhanced', 'suppressed', 'inactive']
mjo_storage = []
data = phase_precip_enso.sel(nino = nino)

for phase in mjo_phases:
    data_phase = data.sel(mjo = phase)
    intensity = percentile_all_months_and_regins(data_phase, precip , q )
    mjo_storage.append(intensity)
    
la_nina_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))     

##### 
#Merging Back together


int_50_nino  = xr.concat([el_nino_data,la_nina_data], pd.Index(['el nino', 'la nina'], name = 'enso'))

### 90th Percentile

In [18]:
########## Selecting just a single phase of ENSO
q = 90


'''El Nino'''
nino = 'el nino'
mjo_phases = ['enhanced', 'suppressed', 'inactive']
mjo_storage = []
data = phase_precip_enso.sel(nino = nino)

for phase in mjo_phases:
    data_phase = data.sel(mjo = phase)
    intensity = percentile_all_months_and_regins(data_phase, precip , q )
    mjo_storage.append(intensity)
    
el_nino_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))
# el_nino_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))    




'''La Nina'''
nino = 'la nina'
mjo_phases = ['enhanced', 'suppressed', 'inactive']
mjo_storage = []
data = phase_precip_enso.sel(nino = nino)

for phase in mjo_phases:
    data_phase = data.sel(mjo = phase)
    intensity = percentile_all_months_and_regins(data_phase, precip , q )
    mjo_storage.append(intensity)
    
la_nina_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))     

##### 
#Merging Back together


int_90_nino  = xr.concat([el_nino_data,la_nina_data], pd.Index(['el nino', 'la nina'], name = 'enso'))

### 95th Percentile

In [19]:
########## Selecting just a single phase of ENSO
q = 95


'''El Nino'''
nino = 'el nino'
mjo_phases = ['enhanced', 'suppressed', 'inactive']
mjo_storage = []
data = phase_precip_enso.sel(nino = nino)

for phase in mjo_phases:
    data_phase = data.sel(mjo = phase)
    intensity = percentile_all_months_and_regins(data_phase, precip , q )
    mjo_storage.append(intensity)
    
el_nino_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))
# el_nino_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))    




'''La Nina'''
nino = 'la nina'
mjo_phases = ['enhanced', 'suppressed', 'inactive']
mjo_storage = []
data = phase_precip_enso.sel(nino = nino)

for phase in mjo_phases:
    data_phase = data.sel(mjo = phase)
    intensity = percentile_all_months_and_regins(data_phase, precip , q )
    mjo_storage.append(intensity)
    
la_nina_data = xr.concat(mjo_storage, pd.Index(['enhanced', 'suppressed', 'inactive'], name = 'mjo'))  

##### 
#Merging Back together


int_95_nino  = xr.concat([el_nino_data,la_nina_data], pd.Index(['el nino', 'la nina'], name = 'enso'))

### Saving

In [20]:

save = 0

if save:
    int_50_nino.to_netcdf(save_dir + 'int_50_nino.nc')
    int_95_nino.to_netcdf(save_dir + 'int_95_nino.nc')

    int_90_nino.to_netcdf(save_dir + 'int_90_nino.nc')

In [23]:
int_nino = xr.concat([int_50_nino, int_90_nino, int_95_nino], pd.Index([50,90,95], name  = 'percentile'))
int_nino.to_netcdf(save_dir + 'int_nino.nc')

# Bootstrap

## Functions

* <b> forwards_in_time</b>: moves the data forward so that I can sample something multiple times
* <b>select_additional</b>: Finds the years that are repeated
* <b>calculate_intfrac</b>: calculates the intensity of the data it is given
* <b>bootstrap_phase_int</b> (data for a phase, all data, the percentile, number of loops): the master function for all thosse above. Takes in data for a particular phase, then splits them into different months, then applies the functions above.


In [24]:
# This just moves all the data number * 100 years into the futre

def forward_in_time(additional, number):
    
    # The time has been moved forward by number * 100 years
    times = ((additional.time.dt.year.values + number *100 )* 10000 + 
                       additional.time.dt.month.values *100 + additional.time.dt.day.values)
    values = additional.values
    
    future_time = []
    future_values = []
    
    for i in range(len(times)):
        time = times[i]
        # If an error is return due to a leap year then the code passes this time. I know,I am missing one day
        # for every for years in february, but that isn't that many points.
        try:
            thetime = pd.to_datetime(time,format = '%Y%m%d')
            future_time.append(thetime)
            future_values.append(values[i])
        except:
            pass
#     print('-----------------------------------------------')
#     print('time',len(future_time))
#     print('valus 1 (time)',len(future_values))
# #     print(future_values)
#     print('vals 2 (lon)',len(future_values[0]))
#     print('vals 3 (lat)',len(future_values[0][0]))
#     print('lat',len(additional.lat))
#     print('lon',len(additional.lon) )

    # This is some kind of wierd error. 
    if len(future_time) == 0:
        xrd = 'Failed'
    
    else:
        xrd = xr.Dataset({'precip':(('time','lat','lon'), future_values)},
                {'time':future_time,'lat':additional.lat, 'lon':additional.lon})

    return xrd
    


# * Contains 1 additional function 
def select_additional(data, years):
    
    new_data = data
    
    number = 1
    uniqueValues, occurCount = np.unique(years, return_counts=True) 
    
    # Removing any of the values that have already been sampled more than once
    occurCount = occurCount-1
    occurCount = occurCount[np.where(occurCount > 0)]
    uniqueValues =  uniqueValues[np.where(occurCount > 0)]
    
    
    while(any(occurCount >= 1)):
#         print(occurCount)
        # Find the years that are getting sampled multiple times
        multi_sampled_years = uniqueValues[np.where(occurCount >= 1 )]
        
        additional = data.where(data.time.dt.year.isin(multi_sampled_years), drop = True)

    
        # This moves all the additinal data forwards in time
        additional = forward_in_time(additional, number)
        
        if additional == 'Failed':
            break
        
        else:
            new_data = new_data.combine_first(additional)
            occurCount = occurCount-1
            occurCount = occurCount[np.where(occurCount > 0)]
            uniqueValues =  uniqueValues[np.where(occurCount > 0)]
            number += 1

    
    return new_data
        

In [25]:

def calculate_intfrac(data_sub,data_all,q ):

    # The q-th percentile of phase data
    perc_sub = data_sub.reduce(np.nanpercentile, q = q, dim = 'time')

    # The q-th percentile of all data
    perc_all = data_all.reduce(np.nanpercentile, q = q, dim = 'time')

    # This gives how the q-th percentile in the MJO phase compares to the q-th percentile of all rainfall
    frac = perc_sub/perc_all

    return frac

In [26]:
def bootstrap_phase_int(phase_precip,precip, q, loops):
    
    month_storage = []
    
    #looping through all of the months
    months = [10,11,12,1,2,3]
    for month in months:
        # Subsetting just one month
        phase_precip_month = phase_precip.where(phase_precip.time.dt.month == month, drop = True)
        precip_month = precip.where(phase_precip.time.dt.month == month, drop = True)
        
        loop_storage = []
        
        for i in range(loops):
            #### Getting random years
            # Selecting random years; half the range
            rand_years = np.random.randint(1974,2018,22)

            # Subsetting botht the data to the randomyears
            phase_precip_rand = phase_precip_month.where(phase_precip_month.time.dt.year.isin(rand_years), drop = True)
            precip_rand = precip_month.where(precip_month.time.dt.year.isin(rand_years), drop = True)


            # (data_sub, data)
            phase_precip_rand = select_additional(phase_precip_rand,rand_years)

            precip_rand = select_additional(precip_rand,rand_years)


            #### The actual Calculation
            # (data_sub, data)
            boot_run = calculate_intfrac(phase_precip_rand, precip_rand,q)
            
            loop_storage.append(boot_run)
            
            ## END OF LOOP
        
        month_data = xr.concat(loop_storage, 'run_num')
        month_95 = month_data.reduce(np.nanpercentile, q = 95, dim = 'run_num')
        month_5 = month_data.reduce(np.nanpercentile, q = 5, dim = 'run_num')
        
        month_boot = xr.concat([month_95, month_5], pd.Index([95,5], name = 'percentile'))
        month_storage.append(month_boot)
        
    total_boot = xr.concat(month_storage, pd.Index(months, name = 'month'))
        
    return total_boot  
            


## Calculation

In:

* <b> phase_precip_enso </b>

In [None]:
q = 50
loops = 1000

enso_phases = ['el nino','la nina']
enso_stor = []

mjo_phases  = ['enhanced' ,'suppressed','inactive']



for nino in enso_phases:
    mjo_stor = []
    
    for phase in mjo_phases:
        
        data_in = phase_precip_enso.sel(nino = nino, mjo = phase)
        data_out = bootstrap_phase_int(data_in, precip, q, loops)
        mjo_stor.append(data_out)
        
    mjo_xr = xr.concat(mjo_stor, pd.Index(mjo_phases, name = 'mjo'))
    enso_stor.append(mjo_xr)
    
int_boot_50_nino = xr.concat(enso_stor, pd.Index(enso_phases, name = 'nino'))
        


int_boot_50_nino.to_netcdf(save_dir + 'int_boot_50_nino.nc')

In [None]:
q = 90
loops = 1000

enso_phases = ['el nino','la nina']
enso_stor = []

mjo_phases  = ['enhanced' ,'suppressed','inactive']



for nino in enso_phases:
    mjo_stor = []
    
    for phase in mjo_phases:
        
        data_in = phase_precip_enso.sel(nino = nino, mjo = phase)
        data_out = bootstrap_phase_int(data_in, precip, q, loops)
        mjo_stor.append(data_out)
        
    mjo_xr = xr.concat(mjo_stor, pd.Index(mjo_phases, name = 'mjo'))
    enso_stor.append(mjo_xr)
    
int_boot_90_nino = xr.concat(enso_stor, pd.Index(enso_phases, name = 'nino'))
        
    



int_boot_90_nino.to_netcdf(save_dir + 'int_boot_90_nino.nc')


In [27]:
q = 95
loops = 1000

enso_phases = ['el nino','la nina']
enso_stor = []

mjo_phases  = ['enhanced' ,'suppressed','inactive']



for nino in enso_phases:
    mjo_stor = []
    
    for phase in mjo_phases:
        
        data_in = phase_precip_enso.sel(nino = nino, mjo = phase)
        data_out = bootstrap_phase_int(data_in, precip, q, loops)
        mjo_stor.append(data_out)
        
    mjo_xr = xr.concat(mjo_stor, pd.Index(mjo_phases, name = 'mjo'))
    enso_stor.append(mjo_xr)
    
int_boot_95_nino = xr.concat(enso_stor, pd.Index(enso_phases, name = 'nino'))
        


int_boot_95_nino.to_netcdf(save_dir + 'int_boot_95_nino.nc')