# Global Regression Analysis
This is a methods notebook to optimize the raw .py script that will be used for this analysis.

In [1]:
import glob
import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ocn-clim')

import cartopy.crs as ccrs

import esmtools as et

# Process

1. Load in time series from region of interest.

In [46]:
# This will be defined by sys input
EBU = 'CalCS'
GLOBAL_VAR = 'SST'
ENS = 0
GLOBAL_DIR = '/glade/scratch/rbrady/EBUS_BGC_Variability/global_residuals/SST/'

In [45]:
ens_str = ['001', '002', '009', '010', '011', '012', '013', '014', '015', '016',
            '017', '018', '019', '020', '021', '022', '023', '024', '025', '026',
            '027', '028', '029', '030', '031', '032', '033', '034', '035', '101',
            '102', '103', '104', '105']

In [44]:
# Will take in the residuals for natural CO2 area-weighted over this region.
fileDir = ('/glade/p/work/rbrady/EBUS_BGC_Variability/FG_ALT_CO2/' +
           EBU + '/filtered_output/' + EBU.lower() + 
           '-FG_ALT_CO2-residuals-AW-chavez-800km.nc')
# 34x1152 time series of unfiltered residuals.
ds_regional = xr.open_dataset(fileDir)
ds_regional = ds_regional['FG_ALT_CO2_AW'][ENS]

In [43]:
# Take in the predone global residuals
filedir = GLOBAL_DIR + GLOBAL_VAR + '.' + ens_str[ENS] + '.global_residuals.nc'
ds_global = xr.open_dataset(filedir)
ds_global = ds_global[GLOBAL_VAR]

# Test Case 
Does chaining .apply() work?

** NOTE ** : The test file is under '/glade/scratch/rbrady/slice_of_global_residuals.nc'

It is a small slice of the Pacific to test out regressions over the ensemble and space.

In [27]:
"""
This is the function to be applied to the stacked xarray object.

For now, it will autosmooth. It makes much more sense to have the
shell script select the ensemble number, so that we can parallelize
the ensemble.
"""
def gridcell_correlations(x, regional):
    # Smooth each for comparison.
    x = et.stats.smooth_series(x, 12)
    y = et.stats.smooth_series(regional, 12)

    m, b, r, p, e = et.stats.linear_regression(x, y)
    return xr.Dataset({'m': m, 'r': r, 'p': p})

In [11]:
# This will be defined by sys input
EBU = 'CalCS'
GLOBAL_VAR = 'SST'
ENS = 0

In [12]:
# Will take in the residuals for natural CO2 area-weighted over this region.
fileDir = ('/glade/p/work/rbrady/EBUS_BGC_Variability/FG_ALT_CO2/' +
           EBU + '/filtered_output/' + EBU.lower() + 
           '-FG_ALT_CO2-residuals-AW-chavez-800km.nc')
# 34x1152 time series of unfiltered residuals.
ds_regional = xr.open_dataset(fileDir)
ds_global = xr.open_dataset('/glade/scratch/rbrady/slice_of_global_residuals.nc')

In [13]:
# SELECT THE ENSEMBLE MEMBER.
ds_regional = ds_regional['FG_ALT_CO2_AW'][ENS]
ds_global = ds_global[GLOBAL_VAR][ENS]

In [28]:
correlation = ds_global.stack(gridpoints=['nlat','nlon']) \
             .groupby('gridpoints') \
             .apply(gridcell_correlations, regional=ds_regional) \
             .unstack('gridpoints')

	DataFrame.rolling(window=12,center=False).mean()
  smoothed = pd.rolling_mean(x, length, center=center)


### Just check that using linregress does a decent job

In [None]:
# First ensemble member
region_data = ds_regional['FG_ALT_CO2_AW'][0]
# Arbitrary point
global_data = ds_global['SST'][0, :, 11, 3]

In [None]:
m, b, r, p, e = et.stats.linear_regression(global_data, region_data)

In [None]:
fig = plt.figure(figsize=(10,3))
ax = plt.subplot(111)
region_data.plot(linewidth=1)
global_data.plot(linewidth=1)
print("Correlation Coefficient: " + str(r.round(2)))

###  Still may need to smooth...

In [None]:
region_data = smooth_series(region_data, 12)
global_data = smooth_series(global_data, 12)

In [None]:
m, b, r, p, e = et.stats.linear_regression(global_data, region_data)

In [None]:
fig = plt.figure(figsize=(10,3))
ax = plt.subplot(111)
plt.plot(region_data, linewidth=2)
plt.plot(global_data, linewidth=2)
print("Correlation Coefficient: " + str(r.round(2)))