# Probabilities

In [1]:
# Import Python modules
import os, sys
from pathlib import Path
import numpy as np
import pandas as  pd
import xarray as xr
# matplotlib
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid1 import AxesGrid
from matplotlib import rcParams
# plot styles/formatting
import seaborn as sns
import cmocean
import cmocean.cm as cmo


In [4]:
# Set up paths
home = Path.home()                     # users home directory
root = home / 'repos' / 'montini-phd'  # project root directory
modules = root/'modules'               # project modules
path_to_data = root/'data'           # project data -- read only
path_to_out = root/'out'             # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = root/'figs'           # figures

# Set path to module directory
sys.path.append(modules)

# Import User Modules


In [5]:
# Set a default font for all matplotlib text 
rcParams['font.family'] = 'sans-serif'   # set the default font family to 'sans-serif'
rcParams['font.sans-serif'] = 'Arial'    # set the default sans-serif font to 'Arial'

## Read data

In [None]:
# Read CSV data into pandas DataFrame
infile = path_to_out / 'sallj-types-ndjfm.csv'
df= pd.read_csv(infile)

# set up datetime index
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')

df.head(30)

## Markov Transition Probabilities

In [None]:
def transition_matrix(x, states):
    # set up 
    morder = 1                          # model order (default=1) ** make optional keyword param
    nt = len(x)-1                       # number of transitions
    n = len(states)                          # number of states
    transc = np.zeros((n,n), dtype=int)      # Matrix of transitions counts (initialize counts as 0)

    # Loop to count transitions
    for t in range(nt):
        i = np.where(states == x[t])[0]      # i = indice of current state
        j = np.where(states == x[t+1])[0]    # j = indice of next state    
        transc[i,j] += 1                    # add a transition count for s[i] to s[j]    
  #  print('Transition counts: \n', transc, '\n')

    # Compute marginal totals (sum across rows in M)
    margin = np.sum(transc, axis=1)
  #  print('Marginal totals \n', margin, '\n')

    # Calculate probabilities (divide each row in M by its marginal total)
    probs = transc / margin[:,np.newaxis]
    #print('Transition probabilities: \n', probs, '\n')
    
    return transc, probs


In [None]:
# Calculate continuous transition matrix

x = df.LLJ_CAT.values   # discrete time series random variables (1d)
states = [0, 1, 2, 3, 4]     # input2: list of states

transc, probs = transition_matrix(x, states)

print('Transition counts: \n', transc, '\n')
print('Transition probabilities: \n', np.round(probs,3), '\n')

#test = np.sum(probs, axis=1)
#print(test)

### Multi-year
Loop to count transitions for each season separately; combine transition counts from each year to compute the final matrix of transition probabilities

In [None]:
# set up
years = np.arange(1979, 2017)   # list of years
nyrs = len(years)               # number of years

states = [0, 1, 2, 3, 4] 
nstates = len(states)
transc_myr = np.zeros((nstates, nstates, nyrs), dtype=int)

# Loop to count transitions each year
for k in range(nyrs-1):
    
    # index first and last day of season
    start_date = pd.to_datetime("{}-11-01 9:00".format(years[k]))
    end_date = pd.to_datetime("{}-03-31 9:00".format(years[k+1]))

    # select rows by date using df.loc[start_date:end_date]
    x = df.loc[start_date:end_date, 'LLJ_CAT'].values
    
    # calculate transition matrix
    transc, probs = transition_matrix(x, states)

    # store transition counts in multiyear array
    transc_myr[:,:,k] = transc
    
    # print statements
    #print('Transition counts: \n', transc, '\n')
   # print('Transition probabilities: \n', probs, '\n')


# Sum transition counts over all years
transc_tot = np.sum(transc_myr, axis=2)
print(transc_tot.shape, transc_tot)

# Compute marginal totals
margin_tot = np.sum(transc_tot, axis=1)
print(margin_tot.shape, margin_tot)

# Convert transition counts to probabilities
probs_tot = transc_tot / margin_tot[:,np.newaxis]
print(np.round(probs_tot,3))

In [None]:
test = np.sum(probs_tot, axis=1)
print(test)

## Figure: Heat map of Transition Probabilities

In [None]:
# set seaborn style
sns.set()
sns.set_style("ticks", {'patch.force_edgecolor':False})

# trim colormap
newcmap = cmocean.tools.crop_by_percent(cmo.haline, 3, which='min', N=None)

# create figure
fig, ax = plt.subplots(figsize=(7,6))
ax = sns.heatmap(probs_tot, vmin=0, vmax=1, 
                 annot=True, fmt='5.3f', 
                 linewidths=0.5, square=True,
                cmap=newcmap)

# set plot properties
ax.set_xlim(0, max(states)+1)
ax.set_ylim(max(states)+1, 0)
ax.set_title('Transition Probabilities')
ax.set_xlabel('SALLJ (t+1)')
ax.set_ylabel('SALLJ (t)')

# Display figure
filepath = path_to_figs / 'fig10_v2.png'
plt.savefig(filepath, dpi=150, bbox_inches='tight')
plt.show()
