In [2]:
import numpy as np
import pandas as pd
from ordpy import ordinal_sequence,ordinal_network
import math
from collections import Counter
import re
import ast
import operator
import collections

In [7]:
def import_raw_data():
    djf = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/DJF_Mean_Prec_Regions.csv')
    jja = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/JJA_Mean_Prec_Regions.csv')
    ndjfm = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/NDJFM_Mean_Prec_Regions.csv')
    mjjas = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/MJJAS_Mean_Prec_Regions.csv')
    return djf,jja,ndjfm,mjjas

In [3]:
def three_month_extraction():
    '''
    Returns a pandas dataframe of extracted data that is fully temporally sampled (no gaps)
    for the summer season (DJF/JJA).
    
    Start of DJF: December 1763
    Start of JJA: June 1763
    
    Returns:
        two pandas dataframes of:
            1) DJF
            2) JJA
    '''
    
    djf,jja,_,_ =  import_raw_data()
   
    # Truncate time series to generate points of relatively regular sampling
    rel_djf = djf[426:]
    rel_jja = jja[426:]
    
    count_eg = rel_djf['NW_Aus_Prec_Mean'].tolist()
    count = sum(math.isnan(x) for x in count_eg)
    print(f'Length of time series with minor temporal irregularities: {len(rel_djf)}')
    print(f'Number of missing months: {count}')
    
    # Truncate time series to generate points of absolutely regular sampling
    abs_djf = djf[456+2:-1]
    abs_jja = jja[456:-3]
    
    print(f'Length of time series with no temporal irregularities: {len(abs_djf)}')
    
    # DJF
    nw = abs_djf['NW_Aus_Prec_Mean'].tolist()
    ne = abs_djf['NE_Aus_Prec_Mean'].tolist()
    mc = abs_djf['MC_Prec_Mean'].tolist()
    io = abs_djf['IO_Prec_Mean'].tolist()
    eep = abs_djf['EEP_Prec_Mean'].tolist()
    dates_djf = abs_djf['Date'].tolist()
    
    # JJA
    ism = abs_jja['ISM_Prec_Mean'].tolist()
    easm = abs_jja['EASM_Prec_Mean'].tolist()
    dates_jja = abs_jja['Date'].tolist()
    
    # Dates
    date = abs_djf['Date'].tolist()
    
    df_djf = pd.DataFrame(zip(dates_djf,nw,ne,mc,io,eep),
                           columns=['Date','NW_Aus','NE_Aus','MC','IO','EEP'])
    df_jja = pd.DataFrame(zip(dates_jja,ism,easm),columns=['Date','ISM','EASM'])
    
    return df_djf,df_jja

In [10]:
def delete_zeros(input_list):
    zeros = []
    for i in range(len(input_list)):
        value = input_list[i]
        if value == 0:
            zeros.append(i)
        else:
            pass
    test = input_list.copy()
    for i in sorted(zeros,reverse=True):
        del test[i]
    return zeros,test

def five_month_extraction():
    '''
    Returns a pandas dataframe of extracted data that is fully temporally sampled (no gaps)
    for the extended summer season (NDJFM/MJJAS)
    
    Returns:
    two pandas dataframes of:
        1) NDJFM
        2) MJJAS
    '''
    
    _,_,ndjfm,mjjas = import_raw_data()
    
    # Generate absolutely regular sampling in the time series (no temporal gaps at all)
    # NDJFM
    ndjfm_test = ndjfm['NW_Aus_Prec_Mean'].tolist()
    ndjfm_val = int(max(np.argwhere(np.isnan(ndjfm_test))))
    abs_ndjfm = ndjfm[ndjfm_val+11:-2] # Extract all multiples of 5
    
    delete = delete_zeros(abs_ndjfm['NW_Aus_Prec_Mean'].tolist())[0]
    dates = abs_ndjfm['Date'].tolist()
    extra_dates = dates.copy()
    for i in sorted(delete,reverse=True):
        del extra_dates[i]
    
    extra_nw = delete_zeros(abs_ndjfm['NW_Aus_Prec_Mean'].tolist())[1]
    extra_ne = delete_zeros(abs_ndjfm['NE_Aus_Prec_Mean'].tolist())[1]
    extra_mc = delete_zeros(abs_ndjfm['MC_Prec_Mean'].tolist())[1]
    extra_io = delete_zeros(abs_ndjfm['IO_Prec_Mean'].tolist())[1]
    extra_eep =delete_zeros(abs_ndjfm['EEP_Prec_Mean'].tolist())[1]
    
    no_zero_ndjfm = pd.DataFrame(zip(extra_dates,extra_nw,extra_ne,extra_mc,extra_io,extra_eep),
                                 columns=['Date','NW_Aus','NE_Aus','MC','IO','EEP'])
    #five_monthly = np.asarray((extra_nw,extra_ne,extra_mc,extra_io,extra_eep,extra_ism,extra_easm))
    
    # MJJAS - correct
    mjjas_test = mjjas['ISM_Prec_Mean'].tolist()
    mjjas_val = int(max(np.argwhere(np.isnan(mjjas_test))))
    abs_mjjas=mjjas[mjjas_val+1:-5] # So number of periods match
    
    extra_ism = abs_mjjas['ISM_Prec_Mean'].tolist()
    extra_easm =abs_mjjas['EASM_Prec_Mean'].tolist()
    dates_mjjas = abs_mjjas['Date'].tolist()
    
    df_mjjas = pd.DataFrame(zip(dates_mjjas,extra_ism,extra_easm),
                            columns=['Date','ISM','EASM'])
    
    return no_zero_ndjfm,df_mjjas

In [5]:
def extract_region_lists(three_month=True):
    '''
    Extract array of all regional mean precipitation values
    
    Input:
        three_month== type(bool):
            if True: return array of DJF/JJA
            else: return array of NDJFM/MJJAS
    '''
    
    # Extract Austral Summer Regions
    if three_month==True:
        print('DJF/JJA Order: DJF_dates,JJA_dates,nw,ne,mc,ioo,eep,ism,easm')
        
        djf,jja = three_month_extraction()
        
        # DJF
        nw = djf['NW_Aus'].tolist()
        ne = djf['NE_Aus'].tolist()
        mc = djf['MC'].tolist()
        io = djf['IO'].tolist()
        eep = djf['EEP'].tolist()
        date_djf = djf['Date'].tolist()
        
        # JJA
        ism = jja['ISM'].tolist()
        easm = jja['EASM'].tolist()
        date_jja = jja['Date'].tolist()
        
        return np.asarray((date_djf,date_jja,nw,ne,mc,io,eep,ism,easm),dtype=object)
    
    else:
        print('NDJFM/MJJAS Order: NDJFM_dates,MJJAS_dates,nw,ne,mc,ioo,eep,ism,easm')
        
        ndjfm,mjjas = five_month_extraction()
        
        # NDJFM
        nw = ndjfm['NW_Aus'].tolist()
        ne = ndjfm['NE_Aus'].tolist()
        mc = ndjfm['MC'].tolist()
        io = ndjfm['IO'].tolist()
        eep = ndjfm['EEP'].tolist()
        date_ndjfm = ndjfm['Date'].tolist()
        
        # MJJAS
        ism = mjjas['ISM'].tolist()
        easm = mjjas['EASM'].tolist()
        date_mjjas = mjjas['Date'].tolist()
        
        return np.asarray((date_ndjfm,date_mjjas,nw,ne,mc,io,eep,ism,easm),dtype=object)

In [1]:
# For validation

def mcrobie_djfjja_0():
    
    djf_0_nan = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/DJF_0_nan_Mean_Prec_Regions.csv')
    jja_0_nan = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/JJA_0_nan_Mean_Prec_Regions.csv')
    #djf_0_nan = pd.read_csv('C:/Users/Owner/Documents/Thesis/Exploration/Mean Prec Data/DJF_0_nan_Mean_Prec_Regions.csv')
    #jja_0_nan = pd.read_csv('C:/Users/Owner/Documents/Thesis/Exploration/Mean Prec Data/JJA_0_nan_Mean_Prec_Regions.csv')
    
    djf_0_test = djf_0_nan['NW_Aus_Prec_Mean'].tolist()
    jja_0_test = jja_0_nan['ISM_Prec_Mean'].tolist()
    
    djf_val = int(max(np.argwhere(np.isnan(djf_0_test))))
    jja_val = int(max(np.argwhere(np.isnan(jja_0_test))))
    
    djf_0_nan_abs = djf_0_nan[djf_val+1+5:-10] # Start from June 1763 
    jja_0_nan_abs = jja_0_nan[jja_val+1+5:-10] # Start from June 1763
    
    nw_0 = djf_0_nan_abs['NW_Aus_Prec_Mean'].tolist()
    ne_0 = djf_0_nan_abs['NE_Aus_Prec_Mean'].tolist()
    mc_0 = djf_0_nan_abs['MC_Prec_Mean'].tolist()
    io_0 = djf_0_nan_abs['IO_Prec_Mean'].tolist()
    eep_0 = djf_0_nan_abs['EEP_Prec_Mean'].tolist()
    
    ism_0 = jja_0_nan_abs['ISM_Prec_Mean'].tolist()
    easm_0 = jja_0_nan_abs['EASM_Prec_Mean'].tolist()
    
    dates = djf_0_nan_abs['Date'].tolist()
    
    print('Order is: date,nw,ne,mc,io,eep,ism,easm')
    
    return [dates,nw_0,ne_0,mc_0,io_0,eep_0,ism_0,easm_0]

def mcrobie_five_0():
    #ndjfm = pd.read_csv('C:/Users/Owner/Documents/Thesis/Exploration/Mean Prec Data/NDJFM_0_nan_Mean_Prec_Regions.csv')
    #mjjas = pd.read_csv('C:/Users/Owner/Documents/Thesis/Exploration/Mean Prec Data/MJJAS_0_nan_Mean_Prec_Regions.csv')
    ndjfm = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/NDJFM_0_nan_Mean_Prec_Regions.csv')
    mjjas = pd.read_csv('C:/Users/Owner/Documents/Thesis/Code/Data_Extraction/Mean Prec Data/MJJAS_0_nan_Mean_Prec_Regions.csv')

    
    # Generate absolutely regular sampling in the time series (no temporal gaps at all)
    ndjfm_0_test = ndjfm['NW_Aus_Prec_Mean'].tolist()
    mjjas_0_test = mjjas['ISM_Prec_Mean'].tolist()
    
    ndjfm_val = int(max(np.argwhere(np.isnan(ndjfm_0_test))))
    mjjas_val = int(max(np.argwhere(np.isnan(mjjas_0_test))))
    
    abs_ndjfm=ndjfm[ndjfm_val+5:-9] # To ensure same length
    abs_mjjas=mjjas[mjjas_val+5:-9] # To ensure same length
    
    extra_nw = abs_ndjfm['NW_Aus_Prec_Mean'].tolist()
    extra_ne = abs_ndjfm['NE_Aus_Prec_Mean'].tolist()
    extra_mc = abs_ndjfm['MC_Prec_Mean'].tolist()
    extra_io = abs_ndjfm['IO_Prec_Mean'].tolist()
    extra_eep = abs_ndjfm['EEP_Prec_Mean'].tolist()
    
    extra_ism = abs_mjjas['ISM_Prec_Mean'].tolist()
    extra_easm =abs_mjjas['EASM_Prec_Mean'].tolist()
    
    date = abs_ndjfm['Date'].tolist()
    
    print('Order: date, nw, nee, mc, io, eep, ism, easm')
    
    return [date,extra_nw,extra_ne,extra_mc,extra_io,extra_eep,extra_ism,extra_easm]