# Statistical analysis

## Import libraries

In [17]:
import sys
sys.path.append('../')
from utilities.era5_down import annual_sum, annual_mean, season_df, df_corr_pvalues, sel_period
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


## Read glaciers data

In [18]:
df1 = pd.read_csv('../data/out/HURD_1960_2020.csv', sep='\t', index_col=['time'], parse_dates=['time'])
df2 = pd.read_csv('../data/out/JOHNSONS_1960_2020.csv', sep='\t', index_col=['time'], parse_dates=['time'])
df3 = pd.read_csv('../data/out/BELLINGSHAUSEN_1960_2020.csv', sep='\t', index_col=['time'], parse_dates=['time'])


## Join SMBs glaciers

In [19]:
df = df1[['SMB']]
df.columns = ['HURD']
df['JOHNSONS'] = df2['SMB']
df['BELLINGSHAUSEN'] = df3['SMB']
df

Unnamed: 0_level_0,HURD,JOHNSONS,BELLINGSHAUSEN
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-01-01,-0.010134,-0.007264,-0.048413
1959-02-01,0.051922,0.048639,0.025492
1959-03-01,0.014676,0.030327,0.011961
1959-04-01,0.031863,0.030314,0.037331
1959-05-01,0.063714,0.062539,0.066179
...,...,...,...
2020-08-01,0.089102,0.088834,0.083711
2020-09-01,0.104443,0.103801,0.082839
2020-10-01,0.093834,0.090696,0.090144
2020-11-01,0.041850,0.040304,0.026472


## Correlations SMBs

In [20]:
df_corr_pvalues(df)

Unnamed: 0,HURD,JOHNSONS,BELLINGSHAUSEN
HURD,1.0***,0.995***,0.975***
JOHNSONS,0.995***,1.0***,0.974***
BELLINGSHAUSEN,0.975***,0.974***,1.0***


Correlations between SMBs annual glacier and glacier, where significance is indicated **in *** (p < 0.001), ** (p < 0.01) and * (p < 0.05).**

## Read climate modes data SAM, ENSO, PDO and SOI

In [21]:
df4 = pd.read_csv('../data/climate_mode/climate_modes.csv', sep='\t', index_col=['time'],
parse_dates=['time'])
df4


Unnamed: 0_level_0,SAM,NINO3.4,PDO,SOI
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1959-01-01,1.59,0.52,1.04,-1.5
1959-02-01,-0.19,0.44,0.35,-2.3
1959-03-01,-0.54,0.24,-0.14,2.1
1959-04-01,-1.25,0.17,-0.16,0.7
1959-05-01,-0.32,-0.11,-0.13,0.8
...,...,...,...,...
2020-08-01,-2.20,-0.57,-1.32,1.8
2020-09-01,-0.25,-0.84,-1.04,1.5
2020-10-01,1.79,-1.21,-0.62,0.8
2020-11-01,1.14,-1.33,-1.58,1.1


## Merge climate modes and BELLINGSHAUSEN glacier

In [22]:
df_all = pd.merge(df4, df3, how='left', left_index=True, right_index=True).drop(['HGT'], axis=1)
df_all.to_csv('../data/out/Belling_climate_modes_glacier.csv', sep='\t')
df_all.to_excel('../data/out/Belling_climate_modes_glacier.xlsx', index=True)
df_all

Unnamed: 0_level_0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1959-01-01,1.59,0.52,1.04,-1.5,-0.330943,5.565953,0.028780,-0.048413,0.077192,0.068192,0.014937
1959-02-01,-0.19,0.44,0.35,-2.3,-1.063417,0.915968,0.038195,0.025492,0.012703,0.005515,0.019742
1959-03-01,-0.54,0.24,-0.14,2.1,-1.360896,3.505931,0.060583,0.011961,0.048623,0.029746,0.031253
1959-04-01,-1.25,0.17,-0.16,0.7,-6.059883,0.000000,0.037331,0.037331,0.000000,0.000000,0.018730
1959-05-01,-0.32,-0.11,-0.13,0.8,-5.209905,0.010123,0.066319,0.066179,0.000140,0.000000,0.033447
...,...,...,...,...,...,...,...,...,...,...,...
2020-08-01,-2.20,-0.57,-1.32,1.8,-7.341003,0.408967,0.089383,0.083711,0.005672,0.000000,0.044506
2020-09-01,-0.25,-0.84,-1.04,1.5,-5.275930,0.006578,0.082930,0.082839,0.000091,0.000000,0.041806
2020-10-01,1.79,-1.21,-0.62,0.8,-3.201353,0.003562,0.090194,0.090144,0.000049,0.000000,0.046026
2020-11-01,1.14,-1.33,-1.58,1.1,-0.988233,2.253453,0.057724,0.026472,0.031252,0.017583,0.029839


## Monthly correlation

In [23]:
df_all.corr()
df_corr_pvalues(df_all)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,-0.04,-0.032,0.061,0.139***,0.118**,0.001,-0.104**,0.118**,0.116**,0.006
NINO3.4,-0.04,1.0***,0.419***,-0.718***,-0.028,-0.068,0.04,0.067,-0.068,-0.071,0.04
PDO,-0.032,0.419***,1.0***,-0.388***,0.036,-0.002,-0.047,-0.007,-0.002,-0.009,-0.046
SOI,0.061,-0.718***,-0.388***,1.0***,0.065,0.106**,-0.005,-0.095**,0.106**,0.107**,-0.003
T2,0.139***,-0.028,0.036,0.065,1.0***,0.643***,-0.442***,-0.651***,0.643***,0.617***,-0.413***
PDD,0.118**,-0.068,-0.002,0.106**,0.643***,1.0***,-0.55***,-0.987***,1.0***,0.998***,-0.54***
SF,0.001,0.04,-0.047,-0.005,-0.442***,-0.55***,1.0***,0.676***,-0.55***,-0.559***,0.999***
SMB,-0.104**,0.067,-0.007,-0.095**,-0.651***,-0.987***,0.676***,1.0***,-0.987***,-0.987***,0.667***
MELT,0.118**,-0.068,-0.002,0.106**,0.643***,1.0***,-0.55***,-0.987***,1.0***,0.998***,-0.54***
Q,0.116**,-0.071,-0.009,0.107**,0.617***,0.998***,-0.559***,-0.987***,0.998***,1.0***,-0.55***


## Mean annual correlation

In [24]:
dfmean = annual_mean(df_all[['SAM', 'NINO3.4', 'PDO', 'SOI', 'T2', 'PDD', 'SF', 'SMB', 'MELT', 'Q', 'RZ']])
df_corr_pvalues(dfmean)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,-0.004,-0.045,0.073,0.463***,0.336**,0.379**,-0.23,0.336**,0.309*,0.391**
NINO3.4,-0.004,1.0***,0.511***,-0.92***,-0.083,-0.206,0.182,0.238,-0.206,-0.221,0.178
PDO,-0.045,0.511***,1.0***,-0.558***,-0.087,-0.032,-0.045,0.02,-0.032,-0.034,-0.049
SOI,0.073,-0.92***,-0.558***,1.0***,0.136,0.192,-0.051,-0.194,0.192,0.2,-0.045
T2,0.463***,-0.083,-0.087,0.136,1.0***,0.584***,0.227,-0.501***,0.584***,0.544***,0.261*
PDD,0.336**,-0.206,-0.032,0.192,0.584***,1.0***,-0.103,-0.973***,1.0***,0.997***,-0.088
SF,0.379**,0.182,-0.045,-0.051,0.227,-0.103,1.0***,0.331**,-0.103,-0.143,0.999***
SMB,-0.23,0.238,0.02,-0.194,-0.501***,-0.973***,0.331**,1.0***,-0.973***,-0.979***,0.316*
MELT,0.336**,-0.206,-0.032,0.192,0.584***,1.0***,-0.103,-0.973***,1.0***,0.997***,-0.088
Q,0.309*,-0.221,-0.034,0.2,0.544***,0.997***,-0.143,-0.979***,0.997***,1.0***,-0.129


## Mean summer correlation

In [25]:
df_DJF = season_df(df_all, 'DJF', 1960, 2020)
df_corr_pvalues(df_DJF)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,-0.167,-0.213,0.232,0.359**,0.337**,-0.08,-0.326*,0.337**,0.337**,-0.079
NINO3.4,-0.167,1.0***,0.443***,-0.88***,-0.226,-0.268*,0.182,0.275*,-0.268*,-0.27*,0.181
PDO,-0.213,0.443***,1.0***,-0.506***,-0.123,-0.179,0.179,0.191,-0.179,-0.187,0.179
SOI,0.232,-0.88***,-0.506***,1.0***,0.138,0.207,-0.049,-0.2,0.207,0.207,-0.048
T2,0.359**,-0.226,-0.123,0.138,1.0***,0.945***,-0.385**,-0.937***,0.945***,0.94***,-0.38**
PDD,0.337**,-0.268*,-0.179,0.207,0.945***,1.0***,-0.426***,-0.993***,1.0***,0.999***,-0.423***
SF,-0.08,0.182,0.179,-0.049,-0.385**,-0.426***,1.0***,0.528***,-0.426***,-0.469***,1.0***
SMB,-0.326*,0.275*,0.191,-0.2,-0.937***,-0.993***,0.528***,1.0***,-0.993***,-0.997***,0.525***
MELT,0.337**,-0.268*,-0.179,0.207,0.945***,1.0***,-0.426***,-0.993***,1.0***,0.999***,-0.423***
Q,0.337**,-0.27*,-0.187,0.207,0.94***,0.999***,-0.469***,-0.997***,0.999***,1.0***,-0.465***


## Mean fall correlation

In [26]:
df_MAM = season_df(df_all, 'MAM', 1960, 2020)
df_corr_pvalues(df_MAM)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,0.026,0.023,0.178,0.489***,0.512***,-0.017,-0.485***,0.512***,0.498***,-0.005
NINO3.4,0.026,1.0***,0.491***,-0.815***,-0.007,-0.111,0.129,0.131,-0.111,-0.144,0.131
PDO,0.023,0.491***,1.0***,-0.484***,-0.019,-0.073,-0.082,0.052,-0.073,-0.091,-0.081
SOI,0.178,-0.815***,-0.484***,1.0***,0.124,0.272*,-0.048,-0.266*,0.272*,0.297*,-0.046
T2,0.489***,-0.007,-0.019,0.124,1.0***,0.774***,-0.172,-0.764***,0.774***,0.748***,-0.146
PDD,0.512***,-0.111,-0.073,0.272*,0.774***,1.0***,-0.185,-0.979***,1.0***,0.995***,-0.166
SF,-0.017,0.129,-0.082,-0.048,-0.172,-0.185,1.0***,0.379**,-0.185,-0.243,1.0***
SMB,-0.485***,0.131,0.052,-0.266*,-0.764***,-0.979***,0.379**,1.0***,-0.979***,-0.987***,0.362**
MELT,0.512***,-0.111,-0.073,0.272*,0.774***,1.0***,-0.185,-0.979***,1.0***,0.995***,-0.166
Q,0.498***,-0.144,-0.091,0.297*,0.748***,0.995***,-0.243,-0.987***,0.995***,1.0***,-0.226


## Mean winter correlation

In [27]:
df_JJA = season_df(df_all, 'JJA', 1960, 2020)
df_corr_pvalues(df_JJA)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,0.207,0.057,-0.144,0.435***,0.158,0.23,0.214,0.158,0.089,0.247
NINO3.4,0.207,1.0***,0.449***,-0.832***,-0.02,-0.063,0.091,0.1,-0.063,-0.121,0.086
PDO,0.057,0.449***,1.0***,-0.399**,0.02,0.204,-0.111,-0.136,0.204,0.061,-0.111
SOI,-0.144,-0.832***,-0.399**,1.0***,0.016,0.097,0.121,0.111,0.097,0.125,0.122
T2,0.435***,-0.02,0.02,0.016,1.0***,0.517***,0.355**,0.299*,0.517***,0.344**,0.396**
PDD,0.158,-0.063,0.204,0.097,0.517***,1.0***,0.156,0.04,1.0***,0.685***,0.177
SF,0.23,0.091,-0.111,0.121,0.355**,0.156,1.0***,0.993***,0.156,0.045,0.999***
SMB,0.214,0.1,-0.136,0.111,0.299*,0.04,0.993***,1.0***,0.04,-0.035,0.99***
MELT,0.158,-0.063,0.204,0.097,0.517***,1.0***,0.156,0.04,1.0***,0.685***,0.177
Q,0.089,-0.121,0.061,0.125,0.344**,0.685***,0.045,-0.035,0.685***,1.0***,0.061


## Mean spring correlation

In [28]:
df_SON = season_df(df_all, 'SON', 1960, 2020)
df_corr_pvalues(df_SON)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,-0.05,-0.063,0.009,0.241,0.338**,0.109,-0.068,0.338**,0.296*,0.115
NINO3.4,-0.05,1.0***,0.52***,-0.87***,-0.343**,-0.182,-0.039,0.055,-0.182,-0.096,-0.047
PDO,-0.063,0.52***,1.0***,-0.582***,-0.288*,-0.312*,-0.062,0.097,-0.312*,-0.244,-0.069
SOI,0.009,-0.87***,-0.582***,1.0***,0.382**,0.258*,0.057,-0.076,0.258*,0.189,0.066
T2,0.241,-0.343**,-0.288*,0.382**,1.0***,0.521***,0.11,-0.158,0.521***,0.449***,0.134
PDD,0.338**,-0.182,-0.312*,0.258*,0.521***,1.0***,0.064,-0.435***,1.0***,0.943***,0.077
SF,0.109,-0.039,-0.062,0.057,0.11,0.064,1.0***,0.871***,0.064,0.013,1.0***
SMB,-0.068,0.055,0.097,-0.076,-0.158,-0.435***,0.871***,1.0***,-0.435***,-0.452***,0.864***
MELT,0.338**,-0.182,-0.312*,0.258*,0.521***,1.0***,0.064,-0.435***,1.0***,0.943***,0.077
Q,0.296*,-0.096,-0.244,0.189,0.449***,0.943***,0.013,-0.452***,0.943***,1.0***,0.024


## Summer period correlation (Dec, Jan, Feb, Mar, May)

In [29]:
df_summer = sel_period(df_all, 'summer', 1960, 2020)
df_corr_pvalues(df_summer)


Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,-0.097,-0.115,0.223,0.349**,0.362**,0.007,-0.343**,0.362**,0.355**,0.01
NINO3.4,-0.097,1.0***,0.476***,-0.902***,-0.149,-0.265*,0.193,0.275*,-0.265*,-0.275*,0.193
PDO,-0.115,0.476***,1.0***,-0.585***,-0.087,-0.174,0.056,0.172,-0.174,-0.182,0.057
SOI,0.223,-0.902***,-0.585***,1.0***,0.107,0.261*,-0.029,-0.251,0.261*,0.268*,-0.029
T2,0.349**,-0.149,-0.087,0.107,1.0***,0.829***,-0.393**,-0.835***,0.829***,0.822***,-0.382**
PDD,0.362**,-0.265*,-0.174,0.261*,0.829***,1.0***,-0.365**,-0.993***,1.0***,0.999***,-0.358**
SF,0.007,0.193,0.056,-0.029,-0.393**,-0.365**,1.0***,0.468***,-0.365**,-0.406**,1.0***
SMB,-0.343**,0.275*,0.172,-0.251,-0.835***,-0.993***,0.468***,1.0***,-0.993***,-0.997***,0.462***
MELT,0.362**,-0.265*,-0.174,0.261*,0.829***,1.0***,-0.365**,-0.993***,1.0***,0.999***,-0.358**
Q,0.355**,-0.275*,-0.182,0.268*,0.822***,0.999***,-0.406**,-0.997***,0.999***,1.0***,-0.4**


## Winter period correlation (Jun, Jul, Aug, Set, Oct, Nov)

In [30]:
df_winter = sel_period(df_all, 'winter', 1960, 2020)
df_corr_pvalues(df_winter)

Unnamed: 0,SAM,NINO3.4,PDO,SOI,T2,PDD,SF,SMB,MELT,Q,RZ
SAM,1.0***,0.12,0.054,-0.061,0.424***,0.395**,0.256*,0.091,0.395**,0.314*,0.27*
NINO3.4,0.12,1.0***,0.492***,-0.908***,-0.097,-0.062,0.114,0.136,-0.062,-0.056,0.107
PDO,0.054,0.492***,1.0***,-0.478***,-0.09,-0.151,-0.047,0.015,-0.151,-0.195,-0.051
SOI,-0.061,-0.908***,-0.478***,1.0***,0.168,0.14,0.025,-0.032,0.14,0.103,0.032
T2,0.424***,-0.097,-0.09,0.168,1.0***,0.687***,0.301*,0.017,0.687***,0.564***,0.337**
PDD,0.395**,-0.062,-0.151,0.14,0.687***,1.0***,0.147,-0.26*,1.0***,0.933***,0.172
SF,0.256*,0.114,-0.047,0.025,0.301*,0.147,1.0***,0.917***,0.147,-0.022,0.999***
SMB,0.091,0.136,0.015,-0.032,0.017,-0.26*,0.917***,1.0***,-0.26*,-0.398**,0.906***
MELT,0.395**,-0.062,-0.151,0.14,0.687***,1.0***,0.147,-0.26*,1.0***,0.933***,0.172
Q,0.314*,-0.056,-0.195,0.103,0.564***,0.933***,-0.022,-0.398**,0.933***,1.0***,0.0
