In [1]:
import numpy as np
utils_dir = '../component'
import os, sys
sys.path.insert(0, os.path.abspath(utils_dir))
from utils import *
from class_data_generator import *
np.random.seed(6313)

### Generate deterministic data without exogenous input

In [3]:
# Example 1: random data follow normal distribution 
generator = TimeSeriesGenerator(n_samples=5000, freq='D')

data_d_1, df = generator.gen_determ_data(inputs={'type': 'random', 'param': 1}, 
                                         noise_mean=0, noise_var=1, 
                                         seasonality=False, trend=False, 
                                         mean=1, std=np.sqrt(2))

df

Unnamed: 0,ts_data
2023-01-01,2.355268
2023-01-02,-1.013961
2023-01-03,1.738343
2023-01-04,0.007732
2023-01-05,-0.292297
...,...
2036-09-04,-1.520033
2036-09-05,0.519095
2036-09-06,-1.109830
2036-09-07,-0.229859


In [3]:
# Example 2: linear data
generator = TimeSeriesGenerator(n_samples=5000, freq='D')

data_d_2, df = generator.gen_determ_data(inputs={'type': 'linear', 'param': 1, 'kwargs':{'slope': 0.5}}, 
                                         noise_mean=0, noise_var=1, 
                                         seasonality=False, trend=False, 
                                         mean=1, std=np.sqrt(2))

df

Unnamed: 0,ts_data
2023-01-01,1.287477
2023-01-02,0.130681
2023-01-03,0.271602
2023-01-04,0.001501
2023-01-05,0.754626
...,...
2036-09-04,2497.677713
2036-09-05,2496.646725
2036-09-06,2499.155027
2036-09-07,2499.720059


In [4]:
# Example 3: data with seasonality
generator = TimeSeriesGenerator(n_samples=5000, freq='D')

data_d_3, df = generator.gen_determ_data(inputs={'type': 'random', 'param': 1}, 
                                         noise_mean=0, noise_var=1, 
                                         seasonality=True, trend=False, 
                                         mean=1, std=np.sqrt(2),
                                         period_s=365, amplitude=2)
                                  
df


Unnamed: 0,ts_data
2023-01-01,-1.502105
2023-01-02,2.198493
2023-01-03,0.249476
2023-01-04,0.509593
2023-01-05,-0.395864
...,...
2036-09-04,-1.982759
2036-09-05,-3.689757
2036-09-06,-1.944305
2036-09-07,-2.610840


In [5]:
# Example 4: data with trend
generator = TimeSeriesGenerator(n_samples=5000, freq='D')

data_d_4, df = generator.gen_determ_data(inputs={'type': 'random', 'param': 1}, 
                                         noise_mean=0, noise_var=1, 
                                         seasonality=False, trend=True, 
                                         mean=1, std=np.sqrt(2),
                                         trend_type='linear', slope=0.01)
df


Unnamed: 0,ts_data
2023-01-01,-1.248896
2023-01-02,3.122299
2023-01-03,-1.666614
2023-01-04,0.836376
2023-01-05,-0.893222
...,...
2036-09-04,47.850357
2036-09-05,49.582315
2036-09-06,48.734149
2036-09-07,50.147161


### Generate stochastic data without exogenous input (by model)

In [6]:
# Example 1: generate using AR process
generator = TimeSeriesGenerator(n_samples=5000, freq='D')
data_s_1, df = generator.gen_stochs_data(ar_para=[0.6], 
                                         ma_para=[],
                                         sar_para=[], 
                                         sma_para=[],
                                         d=0, 
                                         D=0, 
                                         seasonal_period=0, 
                                         var_WN=1)
df  

2023-01-01   -1203.223340
2023-01-02    -723.178560
2023-01-03    -433.386754
2023-01-04    -261.457186
2023-01-05    -157.670276
                 ...     
2036-09-04       1.834544
2036-09-05       0.716980
2036-09-06       1.806099
2036-09-07       2.557107
2036-09-08       0.737245
Freq: D, Length: 5000, dtype: float64

In [7]:
# Example 2: generate using MA process
generator = TimeSeriesGenerator(n_samples=5000, freq='D')
data_s_2, df = generator.gen_stochs_data(ar_para=[], 
                                         ma_para=[0.5],
                                         sar_para=[], 
                                         sma_para=[],
                                         d=0, 
                                         D=0, 
                                         seasonal_period=0, 
                                         var_WN=1)
data_s_2,df  

(array([ 3.01545793e+02,  2.54970306e+02,  4.80075392e-01, ...,
        -2.98541514e-01, -4.77488569e-01, -2.90537442e+00]),
 2023-01-01    301.545793
 2023-01-02    254.970306
 2023-01-03      0.480075
 2023-01-04      1.275017
 2023-01-05     -0.140852
                  ...    
 2036-09-04      0.072178
 2036-09-05     -1.056510
 2036-09-06     -0.298542
 2036-09-07     -0.477489
 2036-09-08     -2.905374
 Freq: D, Length: 5000, dtype: float64)

In [8]:
# Example 3: generate using ARMA process
generator = TimeSeriesGenerator(n_samples=5000, freq='D')
data_s_3, df = generator.gen_stochs_data(ar_para=[0.6], 
                                         ma_para=[0.3],
                                         sar_para=[], 
                                         sma_para=[],
                                         d=0, 
                                         D=0, 
                                         seasonal_period=0, 
                                         var_WN=1)

df

2023-01-01   -1060.593253
2023-01-02   -2280.651846
2023-01-03   -1369.609199
2023-01-04    -820.816092
2023-01-05    -491.862636
                 ...     
2036-09-04       1.310993
2036-09-05       0.340676
2036-09-06       0.216838
2036-09-07       2.104576
2036-09-08       1.503198
Freq: D, Length: 5000, dtype: float64

In [9]:
# Example 4: generate using ARIMA process
generator = TimeSeriesGenerator(n_samples=5000, freq='D')
data_s_4, df = generator.gen_stochs_data(ar_para=[0.6], 
                                         ma_para=[0.3],
                                         sar_para=[], 
                                         sma_para=[],
                                         d=1, 
                                         D=0, 
                                         seasonal_period=0, 
                                         var_WN=1)
df


2023-01-01    1870.412326
2023-01-02    2097.631994
2023-01-03    2232.939405
2023-01-04    2313.543230
2023-01-05    2361.431837
                 ...     
2036-09-04    2530.489221
2036-09-05    2532.177143
2036-09-06    2531.609901
2036-09-07    2529.600934
2036-09-08    2527.454178
Freq: D, Length: 5000, dtype: float64

In [14]:
# Example 5: generate using SARIMA process

generator = TimeSeriesGenerator(n_samples=5000, freq='D')
data_s_4, df = generator.gen_stochs_data(ar_para=[], 
                                         ma_para=[], 
                                         sar_para=[0.8], 
                                         sma_para=[0.6, 0.5],
                                         d=0, 
                                         D=1, 
                                         seasonal_period=12, 
                                         var_WN=1)
df


2023-01-01     765.557823
2023-01-02   -2965.283166
2023-01-03    1320.571029
2023-01-04    1327.915602
2023-01-05     777.074985
                 ...     
2036-09-04   -2174.148714
2036-09-05   -2704.107129
2036-09-06   -4587.963309
2036-09-07    5523.534493
2036-09-08   -7833.411243
Freq: D, Length: 5000, dtype: float64

### Generate deterministic data with one exogenous input

In [2]:
#Example 1: random data + random exogenous input 
generator = TimeSeriesGenerator(n_samples=5000, freq='D')

data_series, exog_df, df_ts = generator.gen_determ_data_exo_1(data_inputs={'type': 'random', 'param': 1},
                                                              exog_inputs={'type': 'random', 'param': 0.8},
                                                              noise_mean=0, noise_var=1, 
                                                              seasonality=False, trend=False)
df_ts

Unnamed: 0,ts_data,exog_1
2023-01-01,3.385250,1.287477
2023-01-02,-1.309417,-0.369319
2023-01-03,1.155625,-0.728398
2023-01-04,-1.191067,-1.498499
2023-01-05,-1.288596,-1.245374
...,...,...
2036-09-04,-1.377863,0.177713
2036-09-05,-0.563526,-1.353275
2036-09-06,-0.585809,0.655027
2036-09-07,0.346189,0.720059


In [3]:
# Example 2: cosine data + linear exog
generator = TimeSeriesGenerator(n_samples=5000, freq='D')

data_series, exog_df, df_ts = generator.gen_determ_data_exo_1(data_inputs={'type': 'cosine', 'param': 1, 'kwargs':{'period': 365, 'amplitude': 1.5}},
                                                              exog_inputs={'type': 'linear', 'param': 0.8, 'kwargs':{'slope': 0.6}},
                                                              noise_mean=0, noise_var=1, 
                                                              seasonality=False, trend=False)
df_ts

Unnamed: 0,ts_data,exog_1
2023-01-01,-0.853512,0.0
2023-01-02,3.423736,0.6
2023-01-03,3.731976,1.2
2023-01-04,2.659296,1.8
2023-01-05,3.367896,2.4
...,...,...
2036-09-04,2398.244361,2997.0
2036-09-05,2396.452985,2997.6
2036-09-06,2397.432466,2998.2
2036-09-07,2398.036097,2998.8
