<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Preamble" data-toc-modified-id="Preamble-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preamble</a></span></li><li><span><a href="#Different-Regions" data-toc-modified-id="Different-Regions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Different Regions</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Indivual-Phases" data-toc-modified-id="Indivual-Phases-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Indivual Phases</a></span></li><li><span><a href="#Bootstrap" data-toc-modified-id="Bootstrap-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Bootstrap</a></span><ul class="toc-item"><li><span><a href="#Create-Count-Array" data-toc-modified-id="Create-Count-Array-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Create Count Array</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Running-and-Saving" data-toc-modified-id="Running-and-Saving-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Running and Saving</a></span></li></ul></li></ul></div>

# Preamble

In [2]:
##############################

import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import dask.array
import cartopy.crs as ccrs
import pickle
import matplotlib.colors as colors
import datetime as dt
rb = plt.cm.RdBu
bm = plt.cm.Blues

In [3]:
path = 'RMM.pickle'
pickle_in = open(path, 'rb')
RMM = pickle.load(pickle_in)

In [4]:
path ='AWAP_W.nc'
precip = xr.open_dataset(path, chunks={'time':-1, 'lat': 50, 'lon': 50}).precip

In [5]:
RMM = RMM.reset_index()
RMM['Date'] = RMM['Date'] + pd.to_timedelta('9h')
RMM = RMM.set_index('Date')

# Different Regions

In [6]:
regions = np.array([slice(110, 120),slice(120.25, 140),slice(140.25, 156.25)])
mjo_enhanced_list = np.array([[4,5],[4,5,6],[4,5,6,7]])

In [7]:
########## Enhanced

region_data = []

for reg_num in [0,1,2]:
    region = regions[reg_num]
    precip_region = precip.sel(lon = region)

    mjo_enhanced = mjo_enhanced_list[reg_num]
    enhanced_dates = np.array(RMM[np.logical_and(RMM['Phase'].isin(mjo_enhanced), RMM['Amplitude'] >= 1)].index)

    precip_enhancecd = precip_region.where(precip_region.time.isin(enhanced_dates))
    
    region_data.append(precip_enhancecd)
    
enhanced_precip = (region_data[0].combine_first(region_data[1])).combine_first(region_data[2])



########## Supppressed
region_data = []

for reg_num in [0,1,2]:
    region = regions[reg_num]
    precip_region = precip.sel(lon = region)

    mjo_suppressed = mjo_enhanced_list[reg_num]
    suppressed_dates = np.array(RMM[np.logical_and(~RMM['Phase'].isin(mjo_suppressed), RMM['Amplitude'] >= 1)].index)

    precip_enhancecd = precip_region.where(precip_region.time.isin(suppressed_dates))
    
    region_data.append(precip_enhancecd)
    
suppressed_precip = (region_data[0].combine_first(region_data[1])).combine_first(region_data[2])

########## Inactive
region_data = []

for reg_num in [0,1,2]:
    region = regions[reg_num]
    precip_region = precip.sel(lon = region)

    mjo_inactive = mjo_enhanced_list[reg_num]
    inactive_dates = np.array(RMM[RMM['Amplitude'] <1].index)

    precip_enhancecd = precip_region.where(precip_region.time.isin(inactive_dates))
    
    region_data.append(precip_enhancecd)
    
inactive_precip = (region_data[0].combine_first(region_data[1])).combine_first(region_data[2])

In [8]:
climatology_90 = precip.groupby('time.month').reduce(np.nanpercentile, q = 90, dim = 'time')

  overwrite_input, interpolation)


In [9]:
climatology_95 = precip.groupby('time.month').reduce(np.nanpercentile, q = 95, dim = 'time')

  overwrite_input, interpolation)


In [10]:
def return_extremes(precip, threshold):
    storage = []
    months = [10,11,12,1,2,3]
    for i,month in enumerate(months):
        precip_month = precip.where(precip.time.dt.month == month, drop = True)
        threshold_month = threshold.sel(month = month)
        
        precip_month_ex = precip_month.where(precip_month >= threshold_month)

        storage.append(precip_month_ex)
        
        if i == 0:
            extreme_xr = precip_month_ex
        else:
            extreme_xr = extreme_xr.combine_first(precip_month_ex)  
    
    return  extreme_xr
    

In [11]:
enhanced_90 = return_extremes(enhanced_precip , climatology_90)

suppressed_90 = return_extremes(suppressed_precip , climatology_90)

inactive_90 = return_extremes(inactive_precip , climatology_90)

precip_90 = return_extremes(precip , climatology_90)

In [12]:
enhanced_95 = return_extremes(enhanced_precip , climatology_95)

suppressed_95 = return_extremes(suppressed_precip , climatology_95)

inactive_95 = return_extremes(inactive_precip , climatology_95)

precip_95 = return_extremes(precip , climatology_95)

# Functions

In [13]:
def into_xr(data, orgininal, name = 'frac'):
    return xr.DataArray(
    data,
    dims=['lat','lon'],
    coords={'lat':orgininal.lat, 'lon': orgininal.lon},
    name=name
    )

In [14]:
def calculate_countfrac(data_sub,data_all,per):
    
    # Counting all of the extreme events
    count_sub = data_sub.count(dim = 'time')
    
    # Counting all of the rain events
    count_all = data_all.count(dim = 'time')
    
    # Finding the ratio of the events
    frac = count_sub/count_all
    
    # Finding out how much more or less than expected this is. E.g is we are talking about the 90th percentile
    # then we would expect the frac to be 0.1. But if we get 0.2, the this is twice as much as expected. Dividing
    # this by the per amount (e.g 1/0.1 = 10)
    frac = frac/per
    
    xr = into_xr(frac, data_all, 'frac_count')
    
    return xr

In [15]:
def count_all_months_and_regions(phase_precip, all_precip,per):
    
    months = [10,11,12,1,2,3]
    temp_dict = {}
    
    for month in months:
        # Just the data that is in this month
        phase_prec_month = phase_precip.where(phase_precip.time.dt.month == month, drop = True)
        all_prec_month = all_precip.where(all_precip.time.dt.month == month, drop = True)
        
        # Running the function above
        frac_month = calculate_countfrac(phase_prec_month ,all_prec_month,per)
        
        # Storing the results
        temp_dict[str(month)] = frac_month
    
    frac_int = xr.concat([temp_dict['10'],temp_dict['11'],temp_dict['12'],temp_dict['1'],
                            temp_dict['2'],temp_dict['3']], pd.Index(months, name = 'month'))
    
    return frac_int
    
    
    

# Indivual Phases

In [16]:
per = 0.1
enhanced_count90 = count_all_months_and_regions(enhanced_90 , enhanced_precip, per)

suppressed_count90 = count_all_months_and_regions(suppressed_90, suppressed_precip,per)

inactive_count90 = count_all_months_and_regions(inactive_90, inactive_precip,per)

  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)


In [17]:
count_90 = xr.concat([enhanced_count90, suppressed_count90, inactive_count90],
                    pd.Index(['enhanced','suppressed','inactive'], name = 'mjo'))

In [18]:
per = 0.05

enhanced_count95 = count_all_months_and_regions(enhanced_95 , enhanced_precip, per)

suppressed_count95 = count_all_months_and_regions(suppressed_95, suppressed_precip,per)

inactive_count95 = count_all_months_and_regions(inactive_95, inactive_precip,per)

  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)


In [19]:
count_95 = xr.concat([enhanced_count95, suppressed_count95, inactive_count95],
                    pd.Index(['enhanced','suppressed','inactive'], name = 'mjo'))

In [20]:
save = 1
if save:
    count_95.to_netcdf('count_95.nc')

    count_90.to_netcdf('count_90.nc')

# Bootstrap

## Create Count Array

In [21]:
#The count array is created to speed up the bootstrapping.
#As the end results of count is the count from every mouth, 
#if the count of each month is already created, then the final 
#results can just be the sum of all of these.

In [22]:
def each_year_count(data):
    
    storage = []
    months = [10,11,12,1,2,3]
    for month in months:
        data_month = data.where(data.time.dt.month == month, drop = True)
        count_year = data_month.groupby('time.year').count(dim = 'time')
        storage.append(count_year)
        
    count_all = xr.concat(storage, pd.Index(months, name = 'month'))
    
    return count_all
        
        
        
        
    

In [23]:
en_90_year_count = each_year_count(enhanced_90)
sup_90_year_count = each_year_count(suppressed_90)
inact_90_year_count = each_year_count(inactive_90)

year_count_90 = xr.concat([en_90_year_count, sup_90_year_count,inact_90_year_count],
                       pd.Index(['enhanced', 'suppressed','inactive'], name = 'mjo'))

In [24]:
en_95_year_count = each_year_count(enhanced_95)
sup_95_year_count = each_year_count(suppressed_95)
inact_95_year_count = each_year_count(inactive_95)

year_count_95 = xr.concat([en_95_year_count, sup_95_year_count,inact_95_year_count],
                       pd.Index(['enhanced', 'suppressed','inactive'], name = 'mjo'))

In [25]:
en_year_count = each_year_count(enhanced_precip)
sup_year_count = each_year_count(suppressed_precip)
inact_year_count = each_year_count(inactive_precip)

year_count = xr.concat([en_year_count, sup_year_count,inact_year_count],
                       pd.Index(['enhanced', 'suppressed','inactive'], name = 'mjo'))

In [26]:
en_90_year_count

<xarray.DataArray (month: 6, year: 44, lat: 53, lon: 178)>
dask.array<shape=(6, 44, 53, 178), dtype=float64, chunksize=(1, 1, 50, 50)>
Coordinates:
  * year     (year) int64 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 ...
  * lon      (lon) float64 112.0 112.2 112.5 112.8 113.0 113.2 113.5 113.8 ...
  * lat      (lat) float64 -23.0 -22.75 -22.5 -22.25 -22.0 -21.75 -21.5 ...
  * month    (month) int64 10 11 12 1 2 3

## Functions

In [27]:
def calculate_sum_of_count_frac(data_sub,data_all,per):
    
    # Counting all of the extreme events
    count_sub = data_sub.sum(dim = 'year')
    
    # Counting all of the rain events
    count_all = data_all.sum(dim = 'year')
    
    # Finding the ratio of the events
    frac = count_sub/count_all
    
    # Finding out how much more or less than expected this is. E.g is we are talking about the 90th percentile
    # then we would expect the frac to be 0.1. But if we get 0.2, the this is twice as much as expected. Dividing
    # this by the per amount (e.g 1/0.1 = 10)
    frac = frac/per
    
    xr_file = into_xr(frac, data_all, 'frac_count')
    
    return xr_file

In [28]:
def select_additional(data_sub,data,uniqueValues, occurCount, number):

    
    if any(occurCount >= 2 ):
        # Find the years that are getting sampled multiple times
            # occurCount is the frequency of each year
            # uniqueValues is all the years that have been sampled
        multi_sampled_years = uniqueValues[np.where(occurCount >= 2 )]
        
        #  Finding the data for the additional years
        additional = data.where(data.year.isin(multi_sampled_years), drop = True)
        # Adding n * 100 years to the data so that they don't overlap
        additional['year'] = additional.year +  number * 100
        
        # Combing all the data together
        data_sub = data_sub.combine_first(additional)
        
        #### Running this whole function again (recursivly) to determine which ones have been selected more than
           # twice
        # Reducing the occur count by 1. This will enable the samples that have been sampeld more than twice 
        # to be determing
        occurCount = occurCount - 1
        number += 1
        
        data_sub = select_additional(data,data_sub, uniqueValues, occurCount, number) 
        
        return data_sub
                                
    else:
        return data_sub

In [29]:
def bootstrap_phase_count(precip_ex ,precip, per, loops):
    
    total_boot = []
    
    # Need to do a certain amount of loops
    for i in range(loops):
        
        # Selecting random years; half the ranges word
        rand_years = np.random.randint(1974,2017,22)
        
        # Subsetting botht the data to the randomyears
        precip_ex_rand = precip_ex.where(precip_ex.year.isin(rand_years))
        precip_rand = precip.where(precip.year.isin(rand_years))
        
        
        # If there are years that have been repeated they need to be added
        uniqueValues, occurCount = np.unique(rand_years, return_counts=True)
        number = 1
        precip_ex_rand = select_additional(precip_ex_rand,precip,
                                                   uniqueValues, occurCount, number)
    
        precip_rand = select_additional(precip_rand, precip,
                                             uniqueValues, occurCount, number)
        ######
        # Executing the above function to cork out the count frac
        run_boot = calculate_sum_of_count_frac(precip_ex_rand, precip_rand, per)
        
        total_boot.append(run_boot)

    total_boot = xr.concat(total_boot, 'run_num')

    return total_boot

In [39]:
def bootstrap_each_month_count(precip_ex, precip, per, loops):
    months = [10,11,12,1,2,3]
    xr_month_storage = []   
        
    for month in months:
        # Extracting just the single months
        precip_ex_month = precip_ex.sel(month = month)
        precip_month = precip.sel(month = month)
        
        # Running the above function
        count_month = bootstrap_phase_count(precip_ex_month , precip_month, per, loops)
        
        # Storing the results
        xr_month_storage.append(count_month)
    
    xr_file = xr.concat(xr_month_storage, pd.Index(months, name = 'month'))
    
    return xr_file

## Running and Saving

In [48]:
# This function runs the bootstrap, does the 90th and 10th percneilte calculation, then saves the files
def boot_run_and_save(data,precip, per, loops):
    boot = bootstrap_each_month_count(data,precip, per = per, loops = loops)
    
    boot_90 = boot.groupby('month').reduce(np.nanpercentile, dim = 'run_num', q= 90)
    boot_10 = boot.groupby('month').reduce(np.nanpercentile, dim = 'run_num', q= 10)

    return boot_90, boot_10

#         q = (1 - per) * 100
#     boot_90.to_netcdf('boot_count'+ str(q) + name + '_90.nc')
#     boot_10.to_netcdf('boot_count'+ str(q) + name + '_10.nc')
    
    

In [50]:
per = 0.1
loops = 1000
name = 'enhanced'

en_boot_90, en_boot_10 = boot_run_and_save(year_count_90.sel(mjo = name),year_count.sel(mjo = name),
                                           per , loops)

enhanced_boot = xr.concat([en_boot_90, en_boot_10], pd.Index(['90','10'], name = 'percentile'))



name = 'suppressed'
sup_boot_90, sup_boot_10 = boot_run_and_save(year_count_90.sel(mjo = name),year_count.sel(mjo = name),
                                             per , loops)

suppressed_boot = xr.concat([sup_boot_90, sup_boot_10], pd.Index(['90','10'], name = 'percentile'))



name = 'inactive'
inact_boot_90, inact_boot_10 = boot_run_and_save(year_count_90.sel(mjo = name),year_count.sel(mjo = name),
                                                 per , loops)

inactive_boot = xr.concat([inact_boot_90, inact_boot_10], pd.Index(['90','10'], name = 'percentile'))

count_boot_90 = xr.concat([enhanced_boot, suppressed_boot, inactive_boot], pd.Index(['enhanced','suppressed'
                                                                                   ,'inactive'], name = 'mjo'))

count_boot_90.to_netcdf('count_boot_90.nc')

  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  overwrite_input, interpolation)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  overwrite_input, interpolation)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  overwrite_input, interpolation)


In [63]:
per = 0.05
loops = 1000
name = 'enhanced'
en_boot_90, en_boot_10 = boot_run_and_save(year_count_95.sel(mjo = name),year_count.sel(mjo = name), 
                                           per , loops)

enhanced_boot = xr.concat([en_boot_90, en_boot_10], pd.Index(['90','10'], name = 'percentile'))




name = 'suppressed'
sup_boot_90, sup_boot_10 = boot_run_and_save(year_count_95.sel(mjo = name),year_count.sel(mjo = name)
                                             , per , loops)

suppressed_boot = xr.concat([sup_boot_90, sup_boot_10], pd.Index(['90','10'], name = 'percentile'))



name = 'inactive'
inact_boot_90, inact_boot_10 = boot_run_and_save(year_count_95.sel(mjo = name),year_count.sel(mjo = name), 
                                                 per , loops)

inactive_boot = xr.concat([inact_boot_90, inact_boot_10], pd.Index(['90','10'], name = 'percentile'))

count_boot_95 = xr.concat([enhanced_boot, suppressed_boot, inactive_boot], pd.Index(['enhanced','suppressed'
                                                                                   ,'inactive'], name = 'mjo'))

count_boot_95.to_netcdf('count_boot_95.nc')

  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  overwrite_input, interpolation)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  overwrite_input, interpolation)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  return func(*args2)
  overwrite_input, interpolation)
