# Statistical Tests

In [19]:
import pandas as pd
from scipy.stats import ttest_ind_from_stats
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
import seaborn as sns

In [None]:
START_TIME1 = '2019-04-22 19:28:01'
BOUNDING_BOX1 = [(48.97, 8.5), (48.6, 9.4)] # NW, SE
DISTANCE_BINS1 = 200 # m

START_TIME2 = '2019-04-22 19:28:01'
BOUNDING_BOX2 = [(48.97, 8.5), (48.6, 9.4)] # NW, SE
DISTANCE_BINS2 = 200 # m

## Semivariogram difference

In [None]:
df1 = pd.read_csv('./data/semivariogram_{0}_{1}_{2}.csv'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
df2 = pd.read_csv('./data/semivariogram_{0}_{1}_{2}.csv'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))

print('Mean difference P1 spherical: ' + str(np.mean(df1['fit_P1_sph'] - df2['fit_P1_sph'])))
print('Mean difference P2 spherical: ' + str(np.mean(df1['fit_P2_sph'] - df2['fit_P2_sph'])))
print('Mean difference P1 exponential: ' + str(np.mean(df1['fit_P1_exp'] - df2['fit_P1_exp'])))
print('Mean difference P2 exponential: ' + str(np.mean(df1['fit_P2_exp'] - df2['fit_P2_exp'])))

ax = df1['fit_P1_sph'].plot(marker='.', markersize=10,
                            grid=True, label='P1 semivariogram spherical', legend=True, figsize=(15,10))
df1['fit_P2_sph'].plot(marker='.', markersize=10,
                       grid=True, label='P2 semivariogram spherical', legend=True, ax=ax, figsize=(15,10))
df1['fit_P1_exp'].plot(marker='.', markersize=10,
                       grid=True, label='P1 semivariogram exponential', legend=True, ax=ax, figsize=(15,10))
df1['fit_P2_exp'].plot(marker='.', markersize=10,
                       grid=True, label='P2 semivariogram exponential', legend=True, ax=ax, figsize=(15,10))

df2['fit_P1_sph'].plot(marker='.', markersize=10,
                       grid=True, label='P1 semivariogram spherical', legend=True, ax=ax, figsize=(15,10))
df2['fit_P2_sph'].plot(marker='.', markersize=10,
                       grid=True, label='P2 semivariogram spherical', legend=True, ax=ax, figsize=(15,10))
df2['fit_P1_exp'].plot(marker='.', markersize=10,
                       grid=True, label='P1 semivariogram exponential', legend=True, ax=ax, figsize=(15,10))
df2['fit_P2_exp'].plot(marker='.', markersize=10,
                       grid=True, label='P2 semivariogram exponential', legend=True, ax=ax, figsize=(15,10))

## Paired Student’s t-test
Tests whether the means of two paired samples are significantly different.

Assumptions

* Observations in each sample are independent and identically distributed (iid).
* Observations in each sample are normally distributed.
* Observations in each sample have the same variance.
* Observations across each sample are paired.

Interpretation

* H0: the means of the samples are equal.
* H1: the means of the samples are unequal.

In [None]:
popt_p1_sph1 = np.load('./data/semivariogramparams_popt_p1_sph_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
pcov_p1_sph1 = np.load('./data/semivariogramparams_pcov_p1_sph_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
popt_p2_sph1 = np.load('./data/semivariogramparams_popt_p2_sph_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
pcov_p2_sph1 = np.load('./data/semivariogramparams_pcov_p2_sph_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
popt_p1_exp1 = np.load('./data/semivariogramparams_popt_p1_exp_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
pcov_p1_exp1 = np.load('./data/semivariogramparams_pcov_p1_exp_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
popt_p2_exp1 = np.load('./data/semivariogramparams_popt_p2_exp_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
pcov_p2_exp1 = np.load('./data/semivariogramparams_pcov_p2_exp_{0}_{1}_{2}.npy'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)))
popt_p1_sph2 = np.load('./data/semivariogramparams_popt_p1_sph_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
pcov_p1_sph2 = np.load('./data/semivariogramparams_pcov_p1_sph_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
popt_p2_sph2 = np.load('./data/semivariogramparams_popt_p2_sph_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
pcov_p2_sph2 = np.load('./data/semivariogramparams_pcov_p2_sph_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
popt_p1_exp2 = np.load('./data/semivariogramparams_popt_p1_exp_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
pcov_p1_exp2 = np.load('./data/semivariogramparams_pcov_p1_exp_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
popt_p2_exp2 = np.load('./data/semivariogramparams_popt_p2_exp_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
pcov_p2_exp2 = np.load('./data/semivariogramparams_pcov_p2_exp_{0}_{1}_{2}.npy'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)))
perr_p1_sph1 = np.sqrt(np.diag(pcov_p1_sph1))
perr_p2_sph1 = np.sqrt(np.diag(pcov_p2_sph1))
perr_p1_exp1 = np.sqrt(np.diag(pcov_p1_exp1))
perr_p2_exp1 = np.sqrt(np.diag(pcov_p2_exp1))
perr_p1_sph2 = np.sqrt(np.diag(pcov_p1_sph2))
perr_p2_sph2 = np.sqrt(np.diag(pcov_p2_sph2))
perr_p1_exp2 = np.sqrt(np.diag(pcov_p1_exp2))
perr_p2_exp2 = np.sqrt(np.diag(pcov_p2_exp2))

print("p sill P1_sph: " + str(ttest_ind_from_stats(popt_p1_sph1[0], perr_p1_sph1[0], 20, popt_p1_sph2[0], perr_p1_sph2[0], 20)[1]))
print("p nugget P1_sph: " + str(ttest_ind_from_stats(popt_p1_sph1[1], perr_p1_sph1[1], 20, popt_p1_sph2[1], perr_p1_sph2[1], 20)[1]))
print("p range P1_sph: " + str(ttest_ind_from_stats(popt_p1_sph1[2], perr_p1_sph1[2], 20, popt_p1_sph2[2], perr_p1_sph2[2], 20)[1]))

print("p sill P2_sph: " + str(ttest_ind_from_stats(popt_p2_sph1[0], perr_p2_sph1[0], 20, popt_p2_sph2[0], perr_p2_sph2[0], 20)[1]))
print("p nugget P2_sph: " + str(ttest_ind_from_stats(popt_p2_sph1[1], perr_p2_sph1[1], 20, popt_p2_sph2[1], perr_p2_sph2[1], 20)[1]))
print("p range P2_sph: " + str(ttest_ind_from_stats(popt_p2_sph1[2], perr_p2_sph1[2], 20, popt_p2_sph2[2], perr_p2_sph2[2], 20)[1]))

print("p sill P1_exp: " + str(ttest_ind_from_stats(popt_p1_exp1[0], perr_p1_exp1[0], 20, popt_p1_exp2[0], perr_p1_exp2[0], 20)[1]))
print("p nugget P1_exp: " + str(ttest_ind_from_stats(popt_p1_exp1[1], perr_p1_exp1[1], 20, popt_p1_exp2[1], perr_p1_exp2[1], 20)[1]))
print("p range P1_exp: " + str(ttest_ind_from_stats(popt_p1_exp1[2], perr_p1_exp1[2], 20, popt_p1_exp2[2], perr_p1_exp2[2], 20)[1]))

print("p sill P2_exp: " + str(ttest_ind_from_stats(popt_p2_exp1[0], perr_p2_exp1[0], 20, popt_p2_exp2[0], perr_p2_exp2[0], 20)[1]))
print("p nugget P2_exp: " + str(ttest_ind_from_stats(popt_p2_exp1[1], perr_p2_exp1[1], 20, popt_p2_exp2[1], perr_p2_exp2[1], 20)[1]))
print("p range P2_exp: " + str(ttest_ind_from_stats(popt_p2_exp1[2], perr_p2_exp1[2], 20, popt_p2_exp2[2], perr_p2_exp2[2], 20)[1]))


In [None]:
P1_sph = pd.DataFrame([{'Variogram': 'semivariogram_p1_sph_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Sill','Value': popt_p1_sph1[0], 'Error': perr_p1_sph1[0]},
                       {'Variogram': 'semivariogram_p1_sph_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Sill','Value': popt_p1_sph2[0], 'Error': perr_p1_sph2[0]},
                       {'Variogram': 'semivariogram_p1_sph_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Nugget','Value': popt_p1_sph1[1], 'Error': perr_p1_sph1[1]},
                       {'Variogram': 'semivariogram_p1_sph_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Nugget','Value': popt_p1_sph2[1], 'Error': perr_p1_sph2[1]},
                       {'Variogram': 'semivariogram_p1_sph_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Range','Value': popt_p1_sph1[2], 'Error': perr_p1_sph1[2]},
                       {'Variogram': 'semivariogram_p1_sph_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Range','Value': popt_p1_sph2[2], 'Error': perr_p1_sph2[2]}])
g = sns.catplot(x="Param", y="Value", hue="Variogram", data=P1_sph,
                height=6, kind="bar", palette="muted")
g.despine(left=True)
g.set_ylabels("Value")

In [70]:
P2_sph = pd.DataFrame([{'Variogram': 'semivariogram_p2_sph_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Sill','Value': popt_p2_sph1[0], 'Error': perr_p2_sph1[0]},
                       {'Variogram': 'semivariogram_p2_sph_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Sill','Value': popt_p2_sph2[0], 'Error': perr_p2_sph2[0]},
                       {'Variogram': 'semivariogram_p2_sph_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Nugget','Value': popt_p2_sph1[1], 'Error': perr_p2_sph1[1]},
                       {'Variogram': 'semivariogram_p2_sph_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Nugget','Value': popt_p2_sph2[1], 'Error': perr_p2_sph2[1]},
                       {'Variogram': 'semivariogram_p2_sph_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Range','Value': popt_p2_sph1[2], 'Error': perr_p2_sph1[2]},
                       {'Variogram': 'semivariogram_p2_sph_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Range','Value': popt_p2_sph2[2], 'Error': perr_p2_sph2[2]}])
g = sns.catplot(x="Param", y="Value", hue="Variogram", data=P2_sph,
                height=6, kind="bar", palette="muted")
g.despine(left=True)
g.set_ylabels("Value")

In [None]:
P1_exp = pd.DataFrame([{'Variogram': 'semivariogram_p1_exp_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Sill','Value': popt_p1_exp1[0], 'Error': perr_p1_exp1[0]},
                       {'Variogram': 'semivariogram_p1_exp_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Sill','Value': popt_p1_exp2[0], 'Error': perr_p1_exp2[0]},
                       {'Variogram': 'semivariogram_p1_exp_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Nugget','Value': popt_p1_exp1[1], 'Error': perr_p1_exp1[1]},
                       {'Variogram': 'semivariogram_p1_exp_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Nugget','Value': popt_p1_exp2[1], 'Error': perr_p1_exp2[1]},
                       {'Variogram': 'semivariogram_p1_exp_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Range','Value': popt_p1_exp1[2], 'Error': perr_p1_exp1[2]},
                       {'Variogram': 'semivariogram_p1_exp_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Range','Value': popt_p1_exp2[2], 'Error': perr_p1_exp2[2]}])
g = sns.catplot(x="Param", y="Value", hue="Variogram", data=P1_exp,
                height=6, kind="bar", palette="muted")
g.despine(left=True)
g.set_ylabels("Value")

In [None]:
P2_exp = pd.DataFrame([{'Variogram': 'semivariogram_p2_exp_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Sill','Value': popt_p2_exp1[0], 'Error': perr_p2_exp1[0]},
                       {'Variogram': 'semivariogram_p2_exp_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Sill','Value': popt_p2_exp2[0], 'Error': perr_p2_exp2[0]},
                       {'Variogram': 'semivariogram_p2_exp_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Nugget','Value': popt_p2_exp1[1], 'Error': perr_p2_exp1[1]},
                       {'Variogram': 'semivariogram_p2_exp_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Nugget','Value': popt_p2_exp2[1], 'Error': perr_p2_exp2[1]},
                       {'Variogram': 'semivariogram_p2_exp_{0}_{1}_{2}'.format(START_TIME1, str(BOUNDING_BOX1), str(DISTANCE_BINS1)),'Param':'Range','Value': popt_p2_exp1[2], 'Error': perr_p2_exp1[2]},
                       {'Variogram': 'semivariogram_p2_exp_{0}_{1}_{2}'.format(START_TIME2, str(BOUNDING_BOX2), str(DISTANCE_BINS2)),'Param':'Range','Value': popt_p2_exp2[2], 'Error': perr_p2_exp2[2]}])
g = sns.catplot(x="Param", y="Value", hue="Variogram", data=P2_exp,
                height=6, kind="bar", palette="muted")
g.despine(left=True)
g.set_ylabels("Value")