In [None]:
from google.colab import drive
drive.mount ('/gdrive')

Mounted at /gdrive


In [None]:
%cd #add path to gdrive

/gdrive/My Drive/Colab Notebooks/Bayesiana


In [None]:
import pandas as pd
import numpy as np
import stan
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow_probability.substrates import numpy as tfp
tfd = tfp.distributions

In [None]:
from cmdstanpy import CmdStanModel, set_cmdstan_path, cmdstan_path
import arviz as az
import os

In [None]:
import cmdstanpy
cmdstanpy.install_cmdstan()

In [None]:
cmdstan_path()

In [None]:
nugget = pd.read_csv('nugget_to_python.csv', sep = ";")
data_vec = pd.read_csv('dat_complete_log_to_python.csv', sep = ';') #log PM10 values
ind_miss = pd.read_csv('ind_miss_to_python.csv', sep = ';')
ind_pres = pd.read_csv('ind_pres_to_python.csv', sep = ';')
dati_covariates = pd.read_csv('covariates.csv', sep = ";")

In [None]:
nugget_mat = np.matrix(nugget, dtype=float)

data_vettore = np.array(data_vec, dtype=float)
ind_miss = np.array(ind_miss)
ind_pres = np.array(ind_pres)

matrix_dati_covariates=np.matrix(dati_covariates)
quota= np.array (matrix_dati_covariates[0,:], dtype=int)
quota_norm = ((quota-quota.mean())/quota.std())

area_dummies = pd.get_dummies(dati_covariates.iloc[1, :])

zona_dummies = pd.get_dummies(dati_covariates.iloc[2, :])
all_cov = np.matrix(pd.concat([area_dummies.Urbano, area_dummies.Suburbano, 
                               zona_dummies.Industriale, zona_dummies.Traffico],axis=1),dtype=bool)
all_cov = np.concatenate((np.transpose(quota_norm),all_cov),axis=1)

### STAN model

In [None]:
fourier_model = """

data {
  int<lower=0> num_giorni;  //number of obs
  int<lower=0> num_stazioni;  //number of stations
  int<lower=0> d;  //length of vector of basis
  vector[d] vec_k;  //vector of basis
  vector[num_giorni] t;  //time instant
  matrix[num_stazioni,num_stazioni] dist_w;  //distance matrix
  int Ncomp; // Number of non-missing values
  int  Nmiss; // Number of missing values
  int ind_pres[Ncomp, 2];
  int ind_miss[Nmiss, 2];
  vector[Ncomp] dat_complete;
  matrix[num_stazioni, 5] covariates;
}


parameters {
  real<lower=0> sigma;
  vector[d] alpha;
  vector[d] beta;
  real<lower=0> a;
  real<lower=0> phi;
  vector[num_stazioni] w;
  vector [Nmiss] dat_miss;
  real beta_0;
  vector[5] beta_cov;
}


transformed parameters {
  real omega = 2*pi()/365;
  vector[num_giorni] fourier;
  matrix[num_giorni,num_stazioni] mu;
  matrix[num_stazioni,num_stazioni] cov_w;
  fourier = rep_vector(0,num_giorni);
  matrix [num_giorni,num_stazioni] y;


  for (i in 1:Ncomp) {
    y[ind_pres[i,1], ind_pres[i,2]] =  dat_complete[i];
  }
  for(i in 1:Nmiss) {
      y[ind_miss[i,1],ind_miss[i,2]] = dat_miss[i];
    }


  for (i in 1:d){
    fourier += alpha[i]*sin(vec_k[i]*omega*t) + beta[i]*cos(vec_k[i]*omega*t);
  }
  

  cov_w = a*exp(-phi*dist_w);
  

  for (j in 1:num_stazioni){
      mu[:,j] = beta_0 + covariates[j,:]*beta_cov + fourier + w[j];
    }
}


model {
  sigma ~ inv_gamma(3,2);

  alpha ~ normal(rep_vector(0,d),1);
  beta ~ normal(rep_vector(0,d),1);

  a ~ inv_gamma(3,2);
  phi ~ beta(7,70);

  beta_0 ~ normal(0, 2);
  beta_cov ~ normal(rep_vector(0,5), 2);

  w ~ multi_normal(rep_vector(0,num_stazioni),cov_w);
  
  for (j in 1:num_stazioni)
    y[:,j] ~ normal(mu[:,j], sqrt(sigma));
}


generated quantities {
  vector[num_giorni*num_stazioni] log_lik;
  {
    matrix [num_giorni, num_stazioni] temp; 
    for (i in 1:365) {
      for (j in 1:num_stazioni) {
      temp[i,j]= normal_lpdf(y[i,j] | mu[i,j], sqrt(sigma));
      }
    }
    log_lik = to_vector(temp);
  }
  
}

"""

stan_file = "./fourier.stan"

with open(stan_file, "w") as f:
    print(fourier_model, file=f)

fourier = CmdStanModel(stan_file=stan_file)

In [None]:
reg_data = {
    "num_giorni": 365,
    "num_stazioni": 62,
    "d": 2,
    "vec_k": [1,2],
    "t": np.arange(1,366),
    "dist_w": nugget_mat,
    "Ncomp": np.shape(ind_pres)[0],
    "Nmiss": np.shape(ind_miss)[0],
    "ind_pres": ind_pres,
    "ind_miss": ind_miss,
    "dat_complete": data_vettore[:,0],
    "covariates": all_cov
}

fit = fourier.sample(data=reg_data, chains=4, parallel_chains=4, 
                iter_warmup=1000, iter_sampling=1000)
fourier_az = az.from_cmdstanpy(fit)

In [None]:
az.plot_trace(fourier_az, var_names=['beta_cov','beta_0','sigma','a','phi','w','alpha', 'beta'], combined=True)

In [None]:
np.sum(fourier_az.sample_stats.diverging)

### Confidence intervals

In [None]:
beta_0 = (fit.stan_variable(var="beta_0")).tolist()

beta_cov = fit.stan_variable(var="beta_cov")
beta_1= (beta_cov[:,0]).tolist()
beta_2= (beta_cov[:,1]).tolist()
beta_3= (beta_cov[:,2]).tolist()
beta_4= (beta_cov[:,3]).tolist()
beta_5= (beta_cov[:,4]).tolist()

alpha = (fit.stan_variable(var="alpha"))
b = (fit.stan_variable(var="beta"))
alpha_1= (alpha[:,0]).tolist()
alpha_2= (alpha[:,1]).tolist()
b_1= (b[:,0]).tolist()
b_2= (b[:,1]).tolist()

sigma = (fit.stan_variable(var="sigma")).tolist()

a = (fit.stan_variable(var="a")).tolist()
phi = (fit.stan_variable(var="phi")).tolist()

In [None]:
import matplotlib.pyplot as plt
import statistics
from math import sqrt


def plot_confidence_interval(x, values, color='#2187bb', horizontal_line_width=0.25):
    mean_data = statistics.mean(values)
    stdev_data = statistics.stdev(values)
    median_data = statistics.median(values)

    left = x - horizontal_line_width / 2
    bottom = np.quantile(values, 0.025)
    right = x + horizontal_line_width / 2
    top = np.quantile(values, 0.975)
    
    confidence_interval = [bottom, top]
    plt.plot([x, x], [top, bottom], color=color)
    plt.plot([left, right], [top, top], color=color)
    plt.plot([left, right], [bottom, bottom], color=color)
    plt.plot(x, median_data, 'o', color='#f44336')

    return mean_data, median_data, confidence_interval

In [None]:
#all
plt.figure(figsize=(12, 6))
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], 
           ['beta0', 'beta1', 'beta2', 'beta3', 'beta4', 'beta5', 'a1', 'a2', 'b1', 'b2', 'sigma', 'alpha', 'phi'])
plt.title('Confidence Interval')
plt.axhline(y=0, color= 'black', linestyle = '--')

plot_confidence_interval(1, beta_0)
plot_confidence_interval(2, beta_1)
plot_confidence_interval(3, beta_2)
plot_confidence_interval(4, beta_3)
plot_confidence_interval(5, beta_4)
plot_confidence_interval(6, beta_5)
plot_confidence_interval(7, alpha_1)
plot_confidence_interval(8, alpha_2)
plot_confidence_interval(9, b_1)
plot_confidence_interval(10, b_2)
plot_confidence_interval(11, sigma)
plot_confidence_interval(12, a)
plot_confidence_interval(13, phi)

plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.xticks([1, 2, 3, 4, 5], ['beta1', 'beta2', 'beta3', 'beta4', 'beta5'])
plt.title('Confidence Interval')
plt.axhline(y=0, color= 'black', linestyle = '--')
plot_confidence_interval(1, beta_1) 
plot_confidence_interval(2, beta_2)
plot_confidence_interval(3, beta_3)
plot_confidence_interval(4, beta_4)
plot_confidence_interval(5, beta_5)

plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.xticks([1, 2, 3, 4], ['a1', 'a2', 'b1', 'b2'])
plt.title('Confidence Interval')
plt.axhline(y=0, color= 'black', linestyle = '--')
plot_confidence_interval(1, alpha_1) 
plot_confidence_interval(2, alpha_2)
plot_confidence_interval(3, b_1)
plot_confidence_interval(4, b_2)

plt.show()

In [None]:
plt.figure(figsize=(8, 6))
plt.xticks([1, 2], ['alpha', 'phi'])
plt.title('Confidence Interval')
plt.axhline(y=0, color= 'black', linestyle = '--')
plot_confidence_interval(1, a) 
plot_confidence_interval(2, phi)

plt.show()