In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stat
from scipy.stats import pearsonr
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import statsmodels.api as sm
    
import warnings
warnings.simplefilter('ignore') #filter some warning messages

# calculate liner regression (order 1) between
# winter, spring, summer individual environmental variables and summer kelp
# up to 2013 - precollapse 

In [None]:
# data
iy = 1991
fy = 2013
kelp = pd.read_csv('../data/BullKelp_summer_July2021.csv',index_col=0)
kelp = kelp[kelp.index>=iy]# 1985
kelp = kelp[kelp.index<=fy]
kelp_nm = list(list(kelp))
print(list(kelp))

env = pd.read_csv('../data/environmentaldata_seasonal_July2021.csv',index_col=0)
env = env[pd.DatetimeIndex(env.index).year>=iy]
env = env[pd.DatetimeIndex(env.index).year<=fy]
env = env[['UI39N','BEUTI 41N', 'BEUTI 39N', 'BEUTI 37N','SSTN14', 'SSTN13', 'MEI', 'PDO', 'NPGO']]
env_nm = list(env)
print(list(env))

seas = ['Winter','Spring','Summer','Fall-1']

In [None]:
r2_nkelp = np.full((len(env_nm),4),np.nan) # r2 for significant regressions
r2_skelp = np.full((len(env_nm),4),np.nan) # r2 for significant regressions

In [None]:
def linreg_deg1(x, y, yr, plots=False, labx='', laby='', sign=1):
    # linear regressions
    # clean, ready variables
    bx = ~np.isnan(x)
    by = ~np.isnan(y)
    nx = x[bx&by]
    ny = np.log(y[bx&by])
    nyrs = yr[bx&by]
    sorx = np.argsort(nx) # sort nx for plotting
    nx = nx.reshape(-1,1)
    ny = ny.reshape(-1,1)

    # first degree
    polf = PolynomialFeatures(1)
    xp = polf.fit_transform(nx)
    mods2 = sm.OLS(ny,xp).fit()
    
    # only if significant, return r2 value
    if mods2.pvalues[1]<=0.05:
        r2 = mods2.rsquared
        
        # print summary
        print(mods2.summary())
        lab='R2='+str(np.round(mods2.rsquared,2))
        print(lab)
        print('AIC = ',np.round(mods2.aic,1))
        
        pc = pearsonr(nx[:,0],ny[:,0])
        print('Pearson Correlation = ', pc)
    
    else:
        r2 = np.nan
        
    # plot if requested
    if (r2 > 0.3) & plots:
        # sign
        
        #sc = np.sign(stat.pearsonr(nx,ny))
        
        sc = np.sign(pc[0])
        
        # nx x var for predictions
        nx2 = np.arange(nx.min(), nx.max()+0.01, 0.01).reshape(-1,1)
        xp2 = polf.fit_transform(nx2)
        
        # boundaries
        _, upper,lower = wls_prediction_std(mods2)
        #lab='r2='+str(np.round(mods2.rsquared_adj,2))
        
        plt.figure()
        plt.scatter(nx,ny,60,c=nyrs, edgecolor='grey', cmap='plasma') # orginal data
        plt.colorbar(ticks=range(nyrs[0],nyrs[-1]+1,2))
        plt.plot(nx2,mods2.predict(xp2),'b-', label=lab) # prediction - line
        plt.plot(nx[sorx],upper[sorx],':',c='r') # confid. intrvl
        plt.plot(nx[sorx],lower[sorx],':',c='r')
        plt.ylabel(laby,fontsize=14)
        plt.xlabel(labx,fontsize=14)
        plt.grid(True, alpha=0.3)
        plt.legend(loc=0,fontsize=14)
        plt.show()

        fig, ax1 = plt.subplots()
        ax1.plot(yr,x,'o-',c='tab:blue')
        ax1.set_ylabel(labx, color='tab:blue', fontsize=14)
        ax1.tick_params(axis='y', labelcolor='tab:blue')
        ax2 = ax1.twinx()
        ax2.plot(yr,y,'d-', c='tab:red')
        if sc==-1:
            ax2.invert_yaxis()
        ax2.set_ylabel('log('+laby+')', color='tab:red', fontsize=14)
        ax2.tick_params(axis='y', labelcolor='tab:red')
        ax2.tick_params(axis='x', labelcolor='grey')
        plt.grid(True, alpha=0.3)
        plt.show()
        
    return r2

In [None]:
# north kelp regression for same year seasons (winter/spring/summer/fall-1) env variables
print('North Kelp')
for i, ii in enumerate([1,4,7,10]): # seasons
    for j, jj in enumerate(env_nm):
        print('\n\n', seas[i], ' - ', jj)
        a = pd.DatetimeIndex(env.index).month==ii # env variables
        x = env[jj][a].values
        y = kelp['North Kelp'].values
        yr = kelp.index.values
        if ii<10:
            r2_nkelp[j,i] = linreg_deg1(x, y, yr, True, jj, 'North Kelp')
        else:
            r2_nkelp[j,i] = linreg_deg1(x[:-1], y[1:], yr[1:], True, jj, 'North Kelp')


In [None]:
# south kelp
print('South Kelp')
for i, ii in enumerate([1,4,7,10]): # seasons
    for j, jj in enumerate(env_nm):
        print('\n\n', seas[i], ' - ', jj)
        a = pd.DatetimeIndex(env.index).month==ii #  MOCI seasons
        x = env[jj][a].values
        y = kelp['South Kelp'].values
        yr = kelp.index.values
        if ii<10:
            r2_skelp[j,i] = linreg_deg1(x, y, yr, True, jj, 'South Kelp')
        else:
            r2_nkelp[j,i] = linreg_deg1(x[:-1], y[1:], yr[1:], True, jj, 'North Kelp')

In [None]:
# multivariate - 1991-2013
# tested all combination (by hand stepwise)
# two significant

# south kelp: winter SST and spring UI39
alldt = kelp.copy()
alldt = alldt.drop(columns=['North Kelp','All Kelp'])
a = pd.DatetimeIndex(env.index).month== 1 # winter
alldt['win_SSTN14'] = env['SSTN14'][a].values
#alldt['SSTN13'] = env['SSTN13'][a].values
a = pd.DatetimeIndex(env.index).month== 4 # spring
alldt['spr_UI39N']=  env['UI39N'][a].values
#alldt['BEUTI 37N']=  env['BEUTI 37N'][a].values
#alldt['NPGO']= env['NPGO'][a].values
a = pd.DatetimeIndex(env.index).month== 7 # spring
#alldt['sum_NPGO']=  env['NPGO'][a].values
# remove nan's
alldt =alldt[~np.isnan(alldt['South Kelp'])]
alldt = sm.add_constant(alldt)
sklp = pd.DataFrame(data=alldt['South Kelp'].values, index=alldt.index.values, columns=['South Kelp'])
alldt = alldt.drop(columns=['South Kelp'])
#for i in list(alldt)[1:]:
#    alldt[i] = (alldt[i]-np.mean(alldt[i].values))/np.std(alldt[i].values)
alldt
# multivariate 
mods = sm.OLS(np.log(sklp),alldt).fit()
print(mods.summary())


# south kelp: winter SST and spring UI39
alldt = kelp.copy()
alldt = alldt.drop(columns=['North Kelp','All Kelp'])
a = pd.DatetimeIndex(env.index).month== 1 # winter
alldt['win_SSTN14'] = env['SSTN14'][a].values
#alldt['SSTN13'] = env['SSTN13'][a].values
a = pd.DatetimeIndex(env.index).month== 4 # spring
#alldt['spr_UI39N']=  env['UI39N'][a].values
alldt['BEUTI 37N']=  env['BEUTI 37N'][a].values
#alldt['NPGO']= env['NPGO'][a].values
a = pd.DatetimeIndex(env.index).month== 7 # spring
#alldt['sum_NPGO']=  env['NPGO'][a].values
# remove nan's
alldt =alldt[~np.isnan(alldt['South Kelp'])]
alldt = sm.add_constant(alldt)
sklp = pd.DataFrame(data=alldt['South Kelp'].values, index=alldt.index.values, columns=['South Kelp'])
alldt = alldt.drop(columns=['South Kelp'])
#for i in list(alldt)[1:]:
#    alldt[i] = (alldt[i]-np.mean(alldt[i].values))/np.std(alldt[i].values)
alldt
# multivariate 
mods = sm.OLS(np.log(sklp),alldt).fit()
print(mods.summary())
