<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Preamble" data-toc-modified-id="Preamble-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preamble</a></span></li><li><span><a href="#Xarrays-for-Different-Phases" data-toc-modified-id="Xarrays-for-Different-Phases-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Xarrays for Different Phases</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Indivual-Phases" data-toc-modified-id="Indivual-Phases-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Indivual Phases</a></span></li><li><span><a href="#Bootstrap" data-toc-modified-id="Bootstrap-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Bootstrap</a></span></li></ul></div>

 # Preamble

In [1]:
# In[1]:


##############################

import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import dask.array
import cartopy.crs as ccrs
import pickle
import matplotlib.colors as colors
import datetime as dt
rb = plt.cm.RdBu
bm = plt.cm.Blues

import warnings
warnings.filterwarnings('ignore')

# In[2]:

In [2]:
path = 'RMM.pickle'
pickle_in = open(path, 'rb')
RMM = pickle.load(pickle_in)

In [3]:
# In[3]:


path ='/home/student.unimelb.edu.au/aborowiak/Desktop/Code/Scripts/big_files/AWAP_W.nc'
precip = xr.open_dataset(path, chunks={'time':-1, 'lat': 50, 'lon': 50}).precip


# In[4]:

In [4]:
RMM = RMM.reset_index()
RMM['Date'] = RMM['Date'] + pd.to_timedelta('9h')
RMM = RMM.set_index('Date')

# Xarrays for Different Phases

In [6]:

'''PURPOSE    Creating xarrays that contiain the rainfall data only for a specific phase of the MJO'''

regions = np.array([slice(110, 120),slice(120.25, 140),slice(140.25, 156.25)])
mjo_enhanced = np.array([[4,5],[4,5,6],[4,5,6,7]])


# In[10]:


d = {}
for i in range(3):
    RMM_enh = np.array(RMM[np.logical_and(RMM['Amplitude'] >= 1, RMM['Phase'].isin(mjo_enhanced[i]))].index)
    r1 = precip.sel(lon = regions[i])
    r1= r1.where(r1.time.isin(RMM_enh))
    d[str(i)] = r1

enhanced_precip = (d['0'].combine_first(d['1'])).combine_first(d['2'])


# In[11]:


d = {}
for i in range(3):
    RMM_enh = np.array(RMM[np.logical_and(RMM['Amplitude'] >= 1, ~RMM['Phase'].isin(mjo_enhanced[i]))].index)
    r1 = precip.sel(lon = regions[i])
    r1= r1.where(r1.time.isin(RMM_enh))
    d[str(i)] = r1

suppressed_precip = (d['0'].combine_first(d['1'])).combine_first(d['2'])


# In[12]:


d = {}
for i in range(3):
    RMM_enh = np.array(RMM[RMM['Amplitude'] < 1].index)
    r1 = precip.sel(lon = regions[i])
    r1= r1.where(r1.time.isin(RMM_enh))
    d[str(i)] = r1

inactive_precip = (d['0'].combine_first(d['1'])).combine_first(d['2'])

# Functions

In [9]:
def into_xr(data, orgininal, name = 'precip'):
    return xr.DataArray(
    data,
    dims=['lat','lon'],
    coords={'lat':orgininal.lat, 'lon': orgininal.lon},
    name=name
    )

In [10]:

def calculate_intfrac(data_sub,data_all,q ):

    # The q-th percentile of phase data
    perc_sub = data_sub.reduce(np.nanpercentile, q = q, dim = 'time')

    # The q-th percentile of all data
    perc_all = data_all.reduce(np.nanpercentile, q = q, dim = 'time')

    # This gives how the q-th percentile in the MJO phase compares to the q-th percentile of all rainfall
    frac = perc_sub/perc_all

    xr = into_xr(frac, data_all, 'precip')

    return xr

In [13]:
# In[8]:


def percentile_all_months_and_regins(phase_precip, all_precip, q):

    months = [10,11,12,1,2,3]
    month_storage = []
    
    # Looping through all of the different months
    for month in months:
        # Finding the values for a single month
        phase_prec_month = phase_precip.where(phase_precip.time.dt.month == month, drop = True)
        all_prec_month = all_precip.where(all_precip.time.dt.month == month, drop = True)
        
        # Calculating the 90th percentile for both the phase and all, then dividing phase by all
        # (data_sub, data_all)
        frac_month = calculate_intfrac(phase_prec_month ,all_prec_month, q)

        month_storage.append(frac_month)

    frac_int = xr.concat(month_storage, pd.Index(months, name = 'month'))

    return frac_int

# Indivual Phases

In [15]:

enhanced_90 = percentile_all_months_and_regins(enhanced_precip, precip , q = 90)

suppressed_90 = percentile_all_months_and_regins(suppressed_precip, precip , q = 90)

inactive_90 = percentile_all_months_and_regins(inactive_precip, precip , q = 90)

In [16]:

int_90 = xr.concat([enhanced_90, suppressed_90, inactive_90],
                    pd.Index(['enhanced','suppressed','inactive'], name = 'mjo'))

In [17]:
enhanced_95 = percentile_all_months_and_regins(enhanced_precip, precip , q = 95)

suppressed_95 = percentile_all_months_and_regins(suppressed_precip, precip , q = 95)

inactive_95 = percentile_all_months_and_regins(inactive_precip, precip , q = 95)



In [18]:
int_95 = xr.concat([enhanced_95, suppressed_95, inactive_95],
                    pd.Index(['enhanced','suppressed','inactive'], name = 'mjo'))


In [19]:
save = 1
if save:
    int_95.to_netcdf('int_frac_95.nc')

    int_90.to_netcdf('int_frac_90.nc')

# Bootstrap

In [42]:
# This just moves all the data number * 100 years into the futre

def forward_in_time(additional, number):
    
    # The time has been moved forward by number * 100 years
    times = ((additional.time.dt.year.values + number *100 )* 10000 + 
                       additional.time.dt.month.values *100 + additional.time.dt.day.values)
    values = additional.values
    
    future_time = []
    future_values = []
    
    for i in range(len(times)):
        time = times[i]
        # If an error is return due to a leap year then the code passes this time. I know,I am missing one day
        # for every for years in february, but that isn't that many points.
        try:
            thetime = pd.to_datetime(time,format = '%Y%m%d')
            future_time.append(thetime)
            future_values.append(values[i])
        except:
            pass
        

    xrd = xr.Dataset({'precip':(('time','lat','lon'), future_values)},
                {'time':future_time,'lat':additional.lat, 'lon':additional.lon})

    return xrd
    


# * Contains 1 additional function 
def select_additional(data, years):
    
    number = 1
    uniqueValues, occurCount = np.unique(years, return_counts=True) 
    
    while(any(occurCount >= 2)):
        # Find the years that are getting sampled multiple times
        multi_sampled_years = uniqueValues[np.where(occurCount >= 2 )]
        
        additional = data.where(data.time.dt.year.isin(multi_sampled_years), drop = True)
        
        
        # This moves all the additinal data forwards in time
        additional = forward_in_time(additional, number)
               
        data = data.combine_first(additional)
                                
        occurCount = occurCount - 1
        number += 1
        
    
    return data
        

In [None]:

def calculate_intfrac(data_sub,data_all,q ):

    # The q-th percentile of phase data
    perc_sub = data_sub.reduce(np.nanpercentile, q = q, dim = 'time')

    # The q-th percentile of all data
    perc_all = data_all.reduce(np.nanpercentile, q = q, dim = 'time')

    # This gives how the q-th percentile in the MJO phase compares to the q-th percentile of all rainfall
    frac = perc_sub/perc_all

    xr = into_xr(frac, data_all, 'precip')

    return xr

In [63]:
def bootstrap_phase_int(phase_precip,precip, q, loops):
    
    month_storage = []
    
    #looping through all of the months
    months = [10,11,12,1,2,3]
    for month in months:
        # Subsetting just one month
        phase_precip_month = phase_precip.where(phase_precip.time.dt.month == month, drop = True)
        precip_month = precip.where(phase_precip.time.dt.month == month, drop = True)
        
        loop_storage = []
        
        for i in range(loops):
            #### Getting random years
            # Selecting random years; half the range
            rand_years = np.random.randint(1974,2017,22)

            # Subsetting botht the data to the randomyears
            phase_precip_rand = phase_precip_month.where(phase_precip_month.time.dt.year.isin(rand_years))
            precip_rand = precip_month.where(precip_month.time.dt.year.isin(rand_years))

            # If there are years that have been repeated they need to be added
            uniqueValues, occurCount = np.unique(rand_years, return_counts=True)

            # (data_sub, data)
            phase_precip_rand = select_additional(phase_precip_rand,phase_precip,
                                                       uniqueValues, occurCount, number = 1)

            precip_rand = select_additional(precip_rand, precip,uniqueValues, occurCount, number = 1)


            #### The actual Calculation
            # (data_sub, data)
            boot_run = calculate_intfrac(phase_precip_rand, precip_rand,q)
            
            loop_storage.append(boot_run)
            
            ## END OF LOOP
        
        month_data = xr.concat(loop_storage, 'run_num')
        month_95 = month_data.reduce(np.nanpercentile, q = 95, dim = 'run_num')
        month_5 = month_data.reduce(np.nanpercentile, q = 5, dim = 'run_num')
        
        month_boot = xr.concat([month_95, month_5], pd.Index([95,5], name = 'percentile'))
        month_storage.append(month_boot)
        
    total_boot = xr.concat(month_storage, pd.Index(months, name = 'month'))
        
    return total_boot  
            


In [None]:
q = 90
loops = 1000

name = 'enhanced'
enhanced_boot = bootstrap_phase_int(enhanced_precip,precip, q , loops = loops)

name = 'suppressed'
suppressed_boot = bootstrap_phase_int(suppressed_precip,precip, q , loops = loops)

name = 'inactive'
inactive_boot = bootstrap_phase_int(inactive_precip,precip, q , loops = loops)

In [None]:
int_boot_90 = xr.concat([enhanced_boot, suppressed_boot, inactive_boot], pd.Index(['enhanced','suppressed'
                                                                                   ,'inactive'], name = 'mjo'))
int_boot_90.to_netcdf('int_boot_90.nc')


In [None]:
q = 95
loops = 1000

name = 'enhanced'
enhanced_boot = bootstrap_phase_int(enhanced_precip,precip, q , loops = loops)

name = 'suppressed'
suppressed_boot = bootstrap_phase_int(suppressed_precip,precip, q , loops = loops)

name = 'inactive'
inactive_boot = bootstrap_phase_int(inactive_precip,precip, q , loops = loops)

In [None]:
int_boot_95 = xr.concat([enhanced_boot, suppressed_boot, inactive_boot], pd.Index(['enhanced','suppressed'
                                                                                   ,'inactive'], name = 'mjo'))
int_boot_95.to_netcdf('int_boot_95.nc')