# Benchmarking Attention-Based Interpretability of Deep Learning in Multivariate Time SeriesPredictions
Domjan Barić, Petar Fumić, Davor Horvatić, and Tomislav Lipić

[Special Issue "Human-Centric AI: The Symbiosis of Human and Artificial Intelligence"](https://www.mdpi.com/journal/entropy/special_issues/Human-Centric_AI)

# Synthetic time series datasets - Quantitative and qualitative analysis 

In [None]:
import os
import pickle
import numpy as np
import torch
import random

import data_generator

In [None]:
seed=2293
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
if not os.path.exists('./mdpi_manuscript_data'):
    os.makedirs('./mdpi_manuscript_data')
    
if not os.path.exists('./mdpi_manuscript_data/quant_qual_analysis'):
    os.makedirs('./mdpi_manuscript_data/quant_qual_analysis')
    
if not os.path.exists('./mdpi_manuscript_data/stres_frequency'):
    os.makedirs('./mdpi_manuscript_data/stres_frequency')
    
if not os.path.exists('./mdpi_manuscript_data/stres_std'):
    os.makedirs('./mdpi_manuscript_data/stres_std')
    
if not os.path.exists('./mdpi_manuscript_data/stres_max_num_series'):
    os.makedirs('./mdpi_manuscript_data/stres_max_num_series')
    
if not os.path.exists('./mdpi_manuscript_data/stres_max_num_series'):
    os.makedirs('./mdpi_manuscript_data/stres_max_num_series')
    
if not os.path.exists('./mdpi_manuscript_data/stat_model'):
    os.makedirs('./mdpi_manuscript_data/stat_model')

In [None]:
mean=0
std=0.1
frequency=0.3
max_num_of_series=5
max_lag=10

## Dataset 1 - N constant time series

$$X_{n,t}=C_n + \epsilon_t,\quad C_n=N(0,1)$$

In [None]:
dataset1=data_generator.dataset1(20000,number_of_timeseries=max_num_of_series,noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset1),open('./mdpi_manuscript_data/quant_qual_analysis/dataset1.pickle','wb'))

## Dataset 2 - N autoregressive time series

$$X_{n,t}= c_{t_{lag}} X_{n,t-t_{lag}}+ \epsilon_t,\quad c_3=1/2,c_7=1/2$$

In [None]:
dataset2=data_generator.dataset2(20000,number_of_timeseries=max_num_of_series,coef_lag_list=[(1,3),(1,7)],noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset2),open('./mdpi_manuscript_data/quant_qual_analysis/dataset2.pickle','wb'))

## Dataset 3 - N nonlinear autoregressive time series

$$X_{n,t}=\tanh(c_{t_{lag}} X_{n,t-t_{lag}}+ \epsilon_t),\quad c_3=5/7,c_7=1/7,c_9=1/7$$

In [None]:
dataset3=data_generator.dataset3(20000,number_of_timeseries=max_num_of_series,coef_lag_list=[(5,3),(1,7),(1,9)],noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset3),open('./mdpi_manuscript_data/quant_qual_analysis/dataset3.pickle','wb'))

## Dataset 4 - two interdependent time series without autoregression

$$X_{1-n,t}=c_{t_{lag}} X_{n,t-t_{lag}}+ \epsilon_t,\quad c_2=2/5,c_5=1/5,c_9=2/5$$

In [None]:
dataset4=data_generator.dataset4(20000,coef_lag_list=[(2,2),(2,9),(1,5)],noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset4),open('./mdpi_manuscript_data/quant_qual_analysis/dataset4.pickle','wb'))

## Dataset 5 - First time series is autoregressive, all other time series are calculated from first

\begin{align}
X_{n,t}&=c_{n,t_{lag}} X_{0,t-t_{lag}}+ \epsilon_t\\
&\\\
c_{0,3}&=1/2,c_{0,4}=1/2 \\
c_{1,9}&=1\\
c_{2,2}&=1/2,c_{2,7}=1/2\\
c_{3,3}&=1/10,c_{3,4}=1/10,c_{3,8}=4/5\\
c_{4,2}&=1/3,c_{4,5}=2/9,c_{3,8}=4/9
\end{align}

In [None]:
coef_lag_list_linear_auto_cross=[[(1,3),(1,4)],
                                [(1,9)],
                                [(1,2),(1,7)],
                                [(8,8),(1,3),(1,4)],
                                [(3,2),(2,5),(4,8)]]

dataset5=data_generator.dataset5(20000,number_of_timeseries=max_num_of_series,coef_lag_list=coef_lag_list_linear_auto_cross,noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset5),open('./mdpi_manuscript_data/quant_qual_analysis/dataset5.pickle','wb'))

## Dataset 6 - First time series is nolinear autoregressive, all other time series are calculated from first

\begin{align}
X_{n,t}&=\tanh(C_{n,t_{lag}} X_{0,t-t_{lag}}+ \epsilon_t )\\
&\\
c_{0,3}&=1/2,c_{0,4}=1/2\\
c_{1,9}&=1\\
c_{2,2}&=1/2,c_{2,7}=1/2\\
c_{3,3}&=1/10,c_{3,4}=1/10,c_{3,8}=4/5\\
c_{4,2}&=1/3,c_{4,5}=2/9,c_{3,8}=4/9
\end{align}

In [None]:
coef_lag_list_linear_auto_cross=[[(1,3)],
                                [(1,9)],
                                [(1,2),(1,7)],
                                [(8,8),(1,3),(1,4)],
                                [(3,2),(2,5),(4,8)]]

dataset6=data_generator.dataset6(20000,number_of_timeseries=max_num_of_series,coef_lag_list=coef_lag_list_linear_auto_cross,noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset6),open('./mdpi_manuscript_data/quant_qual_analysis/dataset6.pickle','wb'))

## Dataset 7 - custom vector autoregression model

\begin{align}
X_{0,t} &= c_{0,1} X_{0,t-1} + c_{0,5} X_{0,t-5} + \epsilon_t,\quad c_{0,1}=1/4,c_{0,5}=3/4\\ 
X_{1,t} &= 1+c_{1,2} X_{0,t-2} + \epsilon_t,\quad c_{1,2}=-1\\ 
X_{2,t} &= c_{2,1} X_{1,t-1} + c_{2,4} X_{3,t-4} + \epsilon_t,\quad c_{2,1}=1,c_{2,4}=1\\ 
X_{3,t} &= 1+c_{3,4} X_{2,t-4} + c_{3,1}X_{0,t-1} + \epsilon_t,\quad c_{3,4}=-2/7,c_{3,1}=5/7\\ 
X_{4,t} &= c_{4,4} X_{4,t-4} + c_{4,1} X_{1,t-1} + \epsilon_t,\quad c_{4,4}=12/22,c_{4,1}=10/22 
\end{align}

In [None]:
dataset7=data_generator.dataset7(20000,noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset7),open('./mdpi_manuscript_data/quant_qual_analysis/dataset7.pickle','wb'))

## Dataset 8 - switching time series

\begin{align}
\mathrm{if} X_{0,t-5} &> {}1/2:\\
X_{0,t} &= c_{0,1} X_{0,t-1} + c_{0,3} X_{0,t-3} + \epsilon_t\\ 
X_{1,t} &=  X_{0,t-5} + \epsilon_t\\ 
X_{2,t} &= X_{0,t-4} + \epsilon_t\\
X_{3,t} &= c_{3,1} X_{3,t-1} + c_{3,4} X_{3,t-4} + \epsilon_t\\
\mathrm{else:}\\
X_{0,t} &= c_{0,1} X_{0,t-1} + c_{0,3} X_{0,t-3} + \epsilon_t\\
X_{1,t} &= X_{3,t-2} + \epsilon_t\\ 
X_{2,t} &= X_{3,t-4} + \epsilon_t\\ 
X_{3,t} &= c_{3,1} X_{3,t-1} + c_{3,4} X_{3,t-4} + \epsilon_t\\
&\\
c_{0,1}=1/2&,c_{0,3}=1/2\\
c_{3,1}=1/2&,c_{3,4}=1/2
\end{align}



In [None]:
dataset8=data_generator.dataset8(20000,0.5,noise_frequency=frequency,mu=mean,sigma=std)

pickle.dump((None,dataset8),open('./mdpi_manuscript_data/quant_qual_analysis/dataset8.pickle','wb'))

## Dataset 9 -  Ising model on first-order 2D square lattice

$$H(\sigma)=-\sum_{\langle i,j\rangle} J_{i,j}\sigma_i\sigma_j-\mu\sum_{j}h_j\sigma_j,\quad T=2, T_c, 2.75$$

In [None]:
for t in [2,2.269185,2.75]:

    ts=data_generator.dataset9(t)
    print(ts.shape)
    note_dict=dict()
    note_dict['physics']='Ising'
    note_dict['order']=1
    note_dict['shape']=(10,10)
    note_dict['T']=t
    pickle.dump((note_dict,ts),open(f'./mdpi_manuscript_data/stat_model/dataset9_T{t}.pickle','wb'))

## Dataset 10 - Logistic map inspired model

\begin{align}
X_{0,t} &= r X_{0,t-3} (1-X_{0,t-3})\\ 
X_{1,t} &= r X_{1,t-5} (1-X_{1,t-5})\\
X_{2,t} &= 1/2 X_{0,t-3} + 1/2 X_{1,t-5}
&\\
r&=1.5, 2.5, 3.2, 3.55, 3.56996
\end{align}

In [None]:
r_list=[1.5,2.5,3.2,3.55,3.57996]
for r in r_list:
    ts=data_generator.dataset10(20000,r)
    note_dict=dict()
    note_dict['physics']='Logistic'
    note_dict['r']=r

    pickle.dump((note_dict,ts),open('./mdpi_manuscript_data/stat_model/dataset10_'+ str(r) +'.pickle','wb'))

# Sensitivity analysis

### Frequency test

In [None]:
mean=0
std=0.1
max_num_of_series=5
max_lag=10
coef_lag_list=[[(1,6),(1,9)],[(1,7)],[(1,6),(2,7),(1,8)],[(8,8),(1,9)],[(3,6),(2,9),(1,8)]]

In [None]:
for frequency in range(0,105,5):
    print(frequency,mean,std,max_num_of_series,max_lag)
    frequency=frequency/100
    dataset2_freq=data_generator.dataset2(20000,number_of_timeseries=max_num_of_series,coef_lag_list=coef_lag_list,noise_frequency=frequency,mu=mean,sigma=std)
    pickle.dump((frequency,dataset2_freq),open(f'./mdpi_manuscript_data/stres_frequency/dataset2_{frequency}.pickle','wb'))
    
    dataset7_freq=data_generator.dataset7(20000,noise_frequency=frequency,mu=mean,sigma=std)
    pickle.dump((frequency,dataset7_freq),open(f'./mdpi_manuscript_data/stres_frequency/dataset7_{frequency}.pickle','wb'))

### Noise amount test

In [None]:
mean=0
frequency=0.3
max_num_of_series=5
max_lag=10
coef_lag_list=[[(1,6),(1,9)],[(1,7)],[(1,6),(2,7),(1,8)],[(8,8),(1,9)],[(3,6),(2,9),(1,8)]]

In [None]:
std_list=[0.01,0.05, 0.1, 0.2, 0.5,1,2,3,5]

In [None]:
for std in std_list:
    print(std,mean,frequency,max_num_of_series,max_lag)
    dataset2_amount=data_generator.dataset2(20000,number_of_timeseries=max_num_of_series,coef_lag_list=coef_lag_list,noise_frequency=frequency,mu=mean,sigma=std)
    pickle.dump((std,dataset2_amount),open(f'./mdpi_manuscript_data/stres_std/dataset2_{std}.pickle','wb'))
    
    dataset7_amount=data_generator.dataset7(20000,noise_frequency=frequency,mu=mean,sigma=std)
    pickle.dump((std,dataset7_amount),open(f'./mdpi_manuscript_data/stres_std/dataset7_{std}.pickle','wb'))

### Number of time series test

In [None]:
mean=0
frequency=0.3
std=0.1
max_lag=10

In [None]:
for max_num_of_series in range(3,21,2):
    coef_lag_list=[]
    lags=list(range(5,10))
    for i in range(max_num_of_series):
        numb_of_lags=random.randint(1,3)
        lags_sample=random.sample(lags,numb_of_lags)
        coef_lag=[]
        for j in lags_sample:
            coef_lag.append((random.randint(1,10),j))
        coef_lag_list.append(coef_lag)
    print(max_num_of_series,len(coef_lag_list),mean,frequency,std,max_lag)
    dataset2_no_of_series=data_generator.dataset2(20000,number_of_timeseries=max_num_of_series,coef_lag_list=coef_lag_list,noise_frequency=frequency,mu=mean,sigma=std)
    pickle.dump(((max_num_of_series,coef_lag_list),dataset2_no_of_series),open(f'./mdpi_manuscript_data/stres_max_num_series/dataset2_{max_num_of_series}.pickle','wb'))
    
    dataset5_no_of_series=data_generator.dataset5(20000,number_of_timeseries=max_num_of_series,coef_lag_list=coef_lag_list,noise_frequency=frequency,mu=mean,sigma=std)
    pickle.dump(((max_num_of_series,coef_lag_list),dataset5_no_of_series),open(f'./mdpi_manuscript_data/stres_max_num_series/dataset5_{max_num_of_series}.pickle','wb'))