# **Time Series Statistical analisys**

## **Import libraries**

In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from patsy import dmatrices

from scipy import stats

## **Selected data reception from PreProc.ipynb**

In [None]:
# passaggio parametri di filtraggio dati
%store -r max_IWV
%store -r max_dist_from_THAAO
%store -r deltatime
# passaggio dati MODIS
%store -r new_mod_vespa
%store -r new_myd_vespa
%store -r new_mxd_vespa
# passaggio plot_defaults
%store -r plot_defaults
# passaggio dati completi non filtrati con deltatime
%store -r vespa_data

### **Controllo dati ricevuti**

In [None]:
# converto da datetime a timestamp (float in seconds, default=ns)
vespa_data['timestamp'] = pd.to_numeric(vespa_data['Time'].values)/ 10 ** 9  # to timestamp in seconds (default=ns)
type(vespa_data['timestamp'][2])

In [None]:
# ok: raw per test
print(type(vespa_data['Time'][0]))
vespa_data.head(1)    

In [None]:
num_mod_data_p_Dt = len(new_mod_vespa.IWV_MODIS) 
num_myd_data_p_Dt = len(new_myd_vespa.IWV_MODIS)
num_mxd_data_p_Dt = len(new_mxd_vespa.IWV_MODIS)
num_modmyd_data_p_Dt = num_mod_data_p_Dt + num_myd_data_p_Dt

In [None]:
print(f'-- Selection parameters --')
print(f'max IWV = {max_IWV} [mm]')
print(f'max distance from THAOO = {max_dist_from_THAAO} [km]')
print(f'Deltatime chosen for data points selection = {deltatime}\n')
print(f'-- Data points selected --')
print(f'Selected number of MOD data points =          {num_mod_data_p_Dt}')
print(f'Selected number of MYD data points =          {num_myd_data_p_Dt}\n')
print(f'Total selected number of MXD data points     =  {num_mxd_data_p_Dt}')
print(f'Total selected number of MOD+MYD data points =  {num_modmyd_data_p_Dt}\n')

## **Statistica**

### **Scipy stats**

#### **Pearson and Spearman correlation coefficients**

In [None]:
res_pearson = stats.pearsonr(new_mxd_vespa.IWV_MODIS,new_mxd_vespa.IWV_THAAO)

In [None]:
print(res_pearson)
print(res_pearson.confidence_interval())

**test**: Pearson correlation coefficient's pvalue=0.0 is less than the minimum floating value, see ref.: 
[pvalue](https://https://stackoverflow.com/questions/45914221/minimal-p-value-for-scipy-stats-pearsonr) <br>

In [None]:
from scipy.stats import beta
from scipy.special import btdtr
ab = 0.5*num_mxd_data_p_Dt
prob = btdtr(ab, ab, 0.5*(1-abs(res_pearson.statistic)))
prob = beta(ab, ab).cdf(0.5*(1-abs(res_pearson.statistic)))
prob

In [None]:
res_spearman = stats.spearmanr(new_mxd_vespa.IWV_MODIS,new_mxd_vespa.IWV_THAAO)

In [None]:
print(f'Spearman: statistic={res_spearman.statistic}, pvalue={res_spearman.pvalue}')

### **Statsmodels**

#### **Ordinary Least Square regression (OLS)**

In [None]:
# Ordinary Least Square regression
mod = smf.ols(formula='IWV_MODIS ~ IWV_THAAO + diff_int + diff_distance + vza', data=new_mxd_vespa)
res = mod.fit()
print(res.summary())

In [None]:
# Ordinary Least Square regression
mod = smf.ols(formula='IWV_MODIS ~ IWV_THAAO', data=new_mxd_vespa)
res = mod.fit()
print(res.summary())

In [None]:
res.params

In [None]:
res.rsquared

In [None]:
sm.graphics.plot_partregress('IWV_MODIS','IWV_THAAO', ['diff_distance'], data=new_mxd_vespa, obs_labels=False)

# **Prove**