# Regression analysis



In [None]:
import xarray as xr
import pandas as pd
import numpy as np
from tqdm import tqdm
import itertools
from matplotlib import pyplot as plt
import glob
import scipy
from modules import *

%load_ext autoreload
%autoreload 2

# Preprocessing

We want to apply the following to the data:

    1) Find the anomalies
    2) Get the annual average
    3) Normalise the indepenant variables

In [None]:
files = glob.glob('processed_data/*')
files = [f for f in files if '_' not in f.split('\\')[1]]
ds = xr.open_mfdataset(files)

In [None]:
ds = (ds
      .pipe(w5.find_anomalies)
      .pipe(w5.yearly_average)
      .pipe(w5.normalise_indepenant, dependant='sic')
     ).compute()                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

ds = ds.sel(time=slice('1979-01-01','2019-12-31'))               

In [None]:
ds

# Correlations

In [None]:
v = [v for v in ds]
correlation_matrix = pd.DataFrame(index=v,columns=v, dtype=np.float64)
for v1,v2 in tqdm(list(itertools.product(v,v))):
    vec1 = ds[v1].mean(dim=('x','y'))
    vec2 = ds[v2].mean(dim=('x','y'))
    correlation_matrix.loc[v1,v2]=xr.corr(vec1,vec2).values
    
def significant_bold(val, sig_level=0.9):
    bold = 'bold' if val > sig_level or val < -sig_level else ''
    return 'font-weight: %s' % bold
correlation_matrix.style.applymap(significant_bold,sig_level=0.9)

In [None]:
plt.pcolormesh(v,v,correlation_matrix.transpose())
plt.colorbar()
plt.show()

# Regressions

In [None]:
x_surface = ['si10','sp', 'ssr', 'sst','t2m','u10','v10']

regression_results = w4.multiple_fast_regression(ds, 'sic', x_surface)

In [None]:
regression_results

In [None]:
w5.plotting(regression_results, 'sic', x_surface)

In [None]:
w5.more_plotting(regression_results, 'sic', x_surface)

In [None]:
dependant = 'sic'
independant = ['si10','sp', 'ssr', 'sst','t2m','u10','v10']
w5.main(dependant, independant)

In [None]:
dependant = 't2m'
independant = ['si10','sp', 'ssr', 'sst','u10','v10']
w5.main(dependant, independant)

In [None]:
dependant = 'ssr'
independant = ['si10','sp', 'sst','t2m','u10','v10']
w5.main(dependant, independant)

In [None]:
dependant = 'sst'
independant = ['si10','sp', 'ssr','t2m','u10','v10']
w5.main(dependant, independant)