In [None]:
from google.colab import drive
drive.mount ('/gdrive')

In [None]:
%cd #gdrive path here

In [None]:
import pandas as pd
import numpy as np
import stan
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow_probability.substrates import numpy as tfp
tfd = tfp.distributions

In [None]:
from cmdstanpy import CmdStanModel, set_cmdstan_path, cmdstan_path
import arviz as az
import os

In [None]:
import cmdstanpy
cmdstanpy.install_cmdstan()

In [None]:
cmdstan_path()

In [None]:
month=np.repeat(np.array([1,2,3,4,5,6,7,8,9,10,11,12]), [31,28,31,30,31,30,31,31,30,31,30,31])

In [None]:
nugget = pd.read_csv('nugget_to_python.csv', sep = ";")
data_vec = pd.read_csv('dat_complete_log_to_python_355.csv', sep = ';') #log PM10 values
ind_miss = pd.read_csv('ind_miss_to_python_355.csv', sep = ';')
ind_pres = pd.read_csv('ind_pres_to_python_355.csv', sep = ';')
dati_covariates = pd.read_csv('covariates.csv', sep = ";")

In [None]:
nugget_mat = np.matrix(nugget, dtype=float)

data_vettore = np.array(data_vec, dtype=float)
ind_miss = np.array(ind_miss)
ind_pres = np.array(ind_pres)

matrix_dati_covariates=np.matrix(dati_covariates)
quota= np.array (matrix_dati_covariates[0,:], dtype=int)
quota_norm = ((quota-quota.mean())/quota.std())

area_dummies = pd.get_dummies(dati_covariates.iloc[1, :])

zona_dummies = pd.get_dummies(dati_covariates.iloc[2, :])
all_cov = np.matrix(pd.concat([zona_dummies.Traffico],axis=1),dtype=bool)
all_cov = np.concatenate((np.transpose(quota_norm),all_cov),axis=1)

### STAN model

In [None]:
fourier_model = """

data {
  int<lower=0> num_giorni;  //number of obs
  int<lower=0> num_test;  //num_giorni-10
  int<lower=0> num_stazioni;  //number of stations
  int<lower=0> d;  //length of vector of basis
  vector[d] vec_k;  //vector of basis
  vector[num_giorni] t;  //time instant
  matrix[num_stazioni,num_stazioni] dist_w;  //distance matrix
  int Ncomp; // Number of non-missing values  //Ncomp senza ultime 10
  int  Nmiss; // Number of missing values  //Nmiss senza ultime 10
  int ind_pres[Ncomp, 2];  //idem
  int ind_miss[Nmiss, 2];  //idem
  vector[Ncomp] dat_complete;  //idem
  matrix[num_stazioni, 2] covariates;
  array[num_giorni] int mese;
}


parameters {
  vector<lower=0>[12] sigma;
  vector[d] alpha;
  vector[d] beta;
  real<lower=0> a;
  real<lower=0> phi;
  vector[num_stazioni] w;
  vector [Nmiss] dat_miss;
  real beta_0;
  vector[2] beta_cov;
}


transformed parameters {
  real omega = 2*pi()/365;
  vector[num_giorni] fourier;
  matrix[num_giorni,num_stazioni] mu;
  matrix[num_stazioni,num_stazioni] cov_w;
  fourier = rep_vector(0,num_giorni);
  matrix [num_test,num_stazioni] y;


  for (i in 1:Ncomp) {
    y[ind_pres[i,1], ind_pres[i,2]] =  dat_complete[i];
  }
  for(i in 1:Nmiss) {
      y[ind_miss[i,1],ind_miss[i,2]] = dat_miss[i];
    }


  for (i in 1:d){
    fourier += alpha[i]*sin(vec_k[i]*omega*t) + beta[i]*cos(vec_k[i]*omega*t);
  }


  cov_w = a*exp(-phi*dist_w);
  
  for(j in 1:num_stazioni){
    mu[:,j] = beta_0 + covariates[j,:]*beta_cov + fourier + w[j];
  }
}


model {
  sigma ~ inv_gamma(3,2);

  alpha ~ normal(rep_vector(0,d),1);
  beta ~ normal(rep_vector(0,d),1);

  a ~ inv_gamma(3,2);
  phi ~ beta(7,70);

  beta_0 ~ normal(0, 2);
  beta_cov ~ normal(rep_vector(0,2), 2);

  w ~ multi_normal(rep_vector(0,num_stazioni),cov_w);
  
  for (i in 1:num_test)
      y[i,:] ~ normal(mu[i,:], sqrt(sigma[mese[i]]));
}


generated quantities {
  vector[num_test*num_stazioni] log_lik;
  {
    matrix [num_test, num_stazioni] temp; 
    for (i in 1:num_test) {
      for (j in 1:num_stazioni) {
      temp[i,j]= normal_lpdf(y[i,j] | mu[i,j], sqrt(sigma[mese[i]]));
      }
    }
    log_lik = to_vector(temp);
  }
  
  matrix [num_giorni,num_stazioni] y_pred;
  for (i in 1:num_giorni){
    for (j in 1:num_stazioni){
      y_pred[i,j]= normal_rng(mu[i,j], sqrt(sigma[mese[i]]));
    }
  }
}

"""

stan_file = "./fourier.stan"

with open(stan_file, "w") as f:
    print(fourier_model, file=f)

fourier = CmdStanModel(stan_file=stan_file)

In [None]:
reg_data = {
    "num_giorni": 365,
    "num_test": 355,
    "num_stazioni": 62,
    "d": 3,
    "vec_k": [1,2,4],
    "t": np.arange(1,366),
    "dist_w": nugget_mat,
    "Ncomp": np.shape(ind_pres)[0],
    "Nmiss": np.shape(ind_miss)[0],
    "ind_pres": ind_pres,
    "ind_miss": ind_miss,
    "dat_complete": data_vettore[:,0],
    "covariates": all_cov,
    "mese": month
}

fit = fourier.sample(data=reg_data, chains=4, parallel_chains=4, 
                iter_warmup=1000, iter_sampling=1000)
fourier_az = az.from_cmdstanpy(fit)

### Variables plot

In [None]:
az.plot_trace(fourier_az, var_names=['beta_cov','beta_0','sigma','a','phi','w','alpha', 'beta'])

In [None]:
az.plot_trace(fourier_az, var_names=['beta_cov','beta_0'])

In [None]:
az.plot_trace(fourier_az, var_names=['sigma'])

In [None]:
az.plot_trace(fourier_az, var_names=['a','phi','w'])

In [None]:
az.plot_trace(fourier_az, var_names=['alpha', 'beta'])

In [None]:
az.plot_trace(fourier_az, var_names=['beta_cov','beta_0','sigma','a','phi','w','alpha', 'beta'],combined=True)

In [None]:
az.plot_trace(fourier_az, var_names=['beta_cov','beta_0'],combined=True)

In [None]:
az.plot_trace(fourier_az, var_names=['sigma'],combined=True)

In [None]:
az.plot_trace(fourier_az, var_names=['a','phi','w'],combined=True)

In [None]:
az.plot_trace(fourier_az, var_names=['alpha', 'beta'],combined=True)

In [None]:
np.sum(fourier_az.sample_stats.diverging)

### Error measures and prediction

In [None]:
waic_result = az.waic(fourier_az,var_name="log_lik")
print(waic_result)
loo_result = az.loo(fourier_az,var_name="log_lik")
loo_result

In [None]:
y_pred = fit.stan_variable(var="y_pred")
y_veri= pd.read_csv('y_veri.csv', sep = ";")

In [None]:
y_veri = np.matrix(y_veri, dtype=float)

In [None]:
Q1=np.zeros(365)
Q2=np.zeros(365)
Q3=np.zeros(365)
for i in range(365):
  Q1[i]= np.quantile(y_pred[:,i,1], 0.025)
  Q2[i]= np.quantile(y_pred[:,i,1], 0.5)
  Q3[i]= np.quantile(y_pred[:,i,1], 0.975)

In [None]:
plt.plot(Q1,color='lightblue')
plt.plot(Q2,color='royalblue')
plt.plot(Q3,color='lightblue')
plt.plot(y_veri[:,1],color='forestgreen')
plt.fill_between(np.arange(0,365),Q1,Q3,color='lightcyan')

In [None]:
plt.plot(np.exp(Q1),color='lightblue')
plt.plot(np.exp(Q2),color='royalblue')
plt.plot(np.exp(Q3),color='lightblue')
plt.plot(np.exp(y_veri[:,1]),color='forestgreen')
plt.fill_between(np.arange(0,365),np.exp(Q1),np.exp(Q3),color='lightcyan')

In [None]:
sum=0
for j in range(62):
  for i in np.arange(355,365):
    sum+= abs(y_veri[i,j] - np.quantile(y_pred[:,i,j], 0.5))
sum/(62*10)

In [None]:
sum=0
for j in range(62):
  for i in np.arange(355,365):
    sum+= abs(y_veri[i,j] - np.mean(y_pred[:,i,j])) / y_veri[i,j]
sum

In [None]:
sum=0
for j in range(62):
  for i in np.arange(355,365):
    sum+= y_veri[i,j] < np.quantile(y_pred[:,i,j], 0.975) and y_veri[i,j] > np.quantile(y_pred[:,i,j], 0.025)
sum