# Investigate Regression Output
After running EBUS-Extraction to pull out the given upwelling system, I ran the climate-correlation.py script to correlate a near-offshore region of the EBUS with specific climate indices. This saves some CSV files with regression results that will now be interpreted here.

In [None]:
# Numerics
import numpy as np
import pandas as pd
import xarray as xr

# Visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
ens = ['001', '002', '009', '010', '011',
       '012', '013', '014', '015', '016',
       '017', '018', '019', '020', '021',
       '022', '023', '024', '025', '026',
       '027', '028', '029', '030', '031',
       '032', '033', '034', '035', '101',
       '102', '103', '104', '105']

In [None]:
fileDir = '/glade/u/home/rbrady/projects/EBUS_BGC_Variability/data/processed/humcs/'
df_enso = pd.DataFrame.from_csv(fileDir + 'smoothed_fgco2_vs_enso_humcs')
df_pdo = pd.DataFrame.from_csv(fileDir + 'smoothed_fgco2_vs_pdo_humcs')
#df_npo = pd.DataFrame.from_csv(fileDir + 'smoothed_fgco2_vs_npo_humcs')
#df_sam = pd.DataFrame.from_csv(fileDir + 'smoothed_fgco2_vs_sam_humcs')

In [None]:
# Drop a NaN row from a mistake in the code.
#df_enso = df_enso.drop(df_enso.index[34])
#df_pdo = df_pdo.drop(df_pdo.index[34])
#df_npo = df_npo.drop(df_npo.index[34])

# Change out the indices to ensemble numbers
df_enso.index = ens
df_pdo.index = ens
#df_npo.index = ens
#df_sam.index = ens

In [None]:
pdo = np.asarray(df_pdo['R Value'])
enso = np.asarray(df_enso['R Value'])
npo = np.asarray(df_npo['R Value'])
#sam = np.asarray(df_sam['R Value'])

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
plt.scatter(ipo, sam, color='k', s=49)
plt.xlabel('IPO Correlation', fontsize='x-large')
plt.ylabel('SAM Correlation', fontsize='x-large')
plt.title('HumCS Sea-Air CO2 Flux Correlations \n (Annual Filter; 1920-2015)', fontsize='x-large')
#plt.yticks(np.arange(0.10, 0.5, 0.05))
plt.tick_params(axis='both', which='major', labelsize=15)
#plt.savefig('HumCS-IPO-SAM-Correlation-Scatter.png', pad_inches=0.1)

In [None]:
def hist_plot(ax, data, climate_index):
    sns.distplot(data, kde=False, color='b', norm_hist=False, bins=7)
    ax.set_title(climate_index + ' Correlation Distribution (N=34)', size='x-large')
    plt.tick_params(axis='both', which='major', labelsize=15)
    plt.xlabel('R Value', size=15)
    plt.ylim([0, 14])
    plt.xlim([-1, 1])
    plt.plot([0, 0], [0, 14], linewidth=1, color='k')

In [None]:
fig = plt.figure(figsize=(18,12))
ax1 = fig.add_subplot(221)
hist_plot(ax1, df_enso['R Value'], 'Nino3.4')
ax2 = fig.add_subplot(222)
hist_plot(ax2, df_pdo['R Value'], 'PDO')
ax3 = fig.add_subplot(223)
hist_plot(ax3, df_npo['R Value'], 'NPO')
#ax4 = fig.add_subplot(224)
#hist_plot(ax4, df_sam['R Value'], 'SAM')
plt.savefig("calcs-correlation-histograms.png")

# Outputting to LaTeX

In [27]:
#df_pdo = df_pdo.drop('P-Value', 1)
#print df_pdo.round(2).to_latex()
np.std(df_enso['R Squared'].values).round(2)

0.050000000000000003