<a href="https://colab.research.google.com/github/io8ex/2021-assignment-numpy/blob/main/Batch_Time_Series_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

from statsmodels.tsa.arima_model import ARIMA
from tqdm import tqdm

In [2]:
import numpy as np


# create dataset
def create_dataset(X : np.array, n_samples : int, mode : str='direct'):
    """
    Function takes time-series X as an input and provides time-series Xo and 
    the test data yo for output.
    ----------------------------------
    n_samples : int, number of samples
    mode : string,
    - 'direct': X0 are the generated batches, yo is x[t+1]
    - 'log': X0 - outputs the log-returns of the data, yo is the next log-return.
    ----------------------------------
    X0: np.array, shape: (len(X) - n_samples - 1, n_samples)
    y0: np.array, shape: len(X) - n_samples - 1
    """
    if mode == 'direct':
        X0 = np.zeros((len(X) - n_samples - 1, n_samples))
        y0 = np.zeros(len(X) - n_samples - 1)
        for i in range(len(X) - n_samples - 1):
            X0[i] = X[i : i + n_samples]
            y0[i] = X[i + n_samples + 1]
    elif mode == 'log':
        X0 = np.zeros((len(X) - n_samples - 2, n_samples))
        y0 = np.zeros(len(X) - n_samples - 2)
        X_return = np.log(X[1:] / X[:-1])
        for i in range(len(X) - n_samples - 2):
            X0[i] = X_return[i:i + n_samples]
            y0[i] = X_return[i + n_samples + 1]
    else:
        raise(NotImplementedError("Unknown method"))

    return X0, y0

In [3]:
# generate test data
n = 100
X = 1 + np.random.rand(n)
X_return = np.log(X[1:] / X[:-1])
X_return[0: 50]

# Xo, yo = create_dataset(X, 50, mode='lq')
# for x, y in zip(xo, yo):
#   print(x, y)
#   break

array([ 0.00612052,  0.32194411,  0.2685502 ,  0.01771698, -0.56836038,
        0.24054234, -0.16599807,  0.37073191, -0.13286607, -0.01147125,
        0.1947951 , -0.3402079 ,  0.33199339, -0.04838027, -0.21562229,
        0.35100725, -0.52982053,  0.54314776, -0.36868216,  0.32867029,
        0.03033115, -0.25374327, -0.09256205, -0.12641262,  0.21353369,
       -0.06772437,  0.21225586, -0.18664178, -0.15377631,  0.39636445,
       -0.18282698,  0.13716919, -0.27985371,  0.10052975, -0.07153252,
        0.33183758, -0.10027653, -0.17621197,  0.17248626, -0.41484036,
        0.22070072, -0.28637871,  0.47758372,  0.10388404, -0.19666094,
        0.1345593 , -0.07077073,  0.01089661, -0.30977792,  0.42466598])

In [4]:
!pip install statsmodels==0.13



In [119]:
import numpy as np
import pmdarima as pm

from pmdarima.arima import ndiffs
from statsmodels.tsa.arima.model import ARIMA
from tqdm import tqdm

class ARIMA_model():
    def __init__(self, p=None, q=None, r=None, auto=False):
        """
        p, q, r: params of ARIMA model,
        see https://www.statsmodels.org/devel/generated/statsmodels.tsa.arima.model.ARIMA.html
        """
        self.auto = auto
        self.p = p
        self.q = q
        self.r = r
    
    def fit_predict(self, X, y):
        y_pred = np.zeros_like(y)

        if self.auto:
            for i in tqdm(range(len(X))):
                ts = X[i]
                model = pm.arima.auto_arima(
                    ts, d=0, max_p=10, max_d=5, 
                    max_q=10, trace=True, error_action='ignore', 
                    suppress_warnings=True
                )
                model_fit = model.fit(ts)
                y_pred[i] = model_fit.predict(n_periods=1)[0]
            return y_pred
        
        for i in tqdm(range(len(X))):
            ts = X[i]
            model = ARIMA(ts, order=(self.p, self.q, self.r))
            model_fit = model.fit()
            y_pred[i] = model_fit.forecast()[0]
        return y_pred

from sklearn.metrics import mean_squared_error as MSE

n_samples = 70
p, q, r = 6, 0, 0

model = ARIMA_model(p, q, r)
X_train, y_train = create_dataset(X, n_samples)
y_pred = model.fit_predict(X_train, y_train)
print()
print("MSE: %.5f" %MSE(y_train, y_pred))

100%|██████████| 29/29 [00:04<00:00,  6.35it/s]


MSE: 0.38167





In [110]:
y_pred

array([ 0.79501227,  1.45922177,  0.55845862,  0.45736232,  0.87639513,
        0.5563469 ,  0.94339388,  1.10440104,  0.41989757,  0.61777606,
        0.42770182,  0.45051689,  0.96635598,  2.00212564,  1.23665502,
        0.37679349,  0.97205809,  0.23288002,  0.70335965,  1.59012535,
       -0.01889175,  1.25232171,  0.61057343,  0.80078336,  0.82414856,
        0.42646696,  0.58157215,  0.59803697])

In [118]:
y_train

array([ 9.47712104e-01, -1.94868472e-03, -5.49291144e-01,  2.95788181e-01,
       -3.00851061e+00,  3.01101367e+00, -1.61216653e-01,  2.08039543e-01,
        9.74777043e-02,  4.01105217e-02,  1.08501968e+00, -1.54992138e+00,
        5.28066250e-02, -1.78888630e-01,  5.28413806e-01,  1.70267569e-01,
       -4.56054759e-01, -1.31177658e+00,  6.51911826e-01,  9.20664627e-01,
       -9.34074070e-01, -6.36330228e-01,  1.82750716e+00, -3.90802476e-03,
       -3.11592703e-01,  3.92129161e-01, -4.96976780e-01,  3.01857597e-01])

In [111]:
y_train

array([ 0.9477121 , -0.00194868, -0.54929114,  0.29578818,         nan,
               nan, -0.16121665,  0.20803954,  0.0974777 ,  0.04011052,
               nan, -1.54992138,         nan, -0.17888863,  0.52841381,
        0.17026757, -0.45605476, -1.31177658,  0.65191183,  0.92066463,
       -0.93407407,         nan,         nan, -0.00390802, -0.3115927 ,
        0.39212916, -0.49697678,  0.3018576 ])

In [98]:
y_train

array([ 0.46635871,  1.20311247,  1.20077026,  0.69327543,  0.93189072,
       -0.04600292,  0.93422621,  0.79512708,  0.97900965,  1.07924737,
        1.12341645, -3.3247488 , -0.70572668,  0.74399525,  0.6221281 ,
        1.05527832,  1.25116124,  0.79295989,  0.21357671,  0.40989781,
        1.02923646,  0.404438  , -0.21404113,  1.3309861 ,  1.32579472,
        0.97085259,  1.43698695,  0.8742156 ,  1.18226175])

In [95]:
ts = X[:70]
model = pm.arima.auto_arima(
    X[:40], d=2, max_p=10, max_d=5, max_q=10
)
model.fit(X[:40])
model.predict(n_periods=1)

array([0.47006628, 0.48765545, 0.48000763, 0.49403317, 0.53989618,
       0.54850321, 0.61495003, 0.52754327, 0.52336233, 0.51844106,
       0.52778338, 0.53267596, 0.53275089, 0.47129921, 0.44977015,
       0.45041803, 0.45041088, 0.46117629, 0.47803904, 0.51207618,
       0.42943089, 0.4759866 , 0.57982496, 0.45469142, 0.35775524,
       0.49614681, 0.61639365, 0.51314345, 0.57707593])

In [82]:
i = 50
ts = X[:i]
model = ARIMA(ts, order=(p,q,r))
model_fit = model.fit()
print(model_fit.forecast())
y_pred[i] = model_fit.forecast()[0]

[0.51397188]


NameError: ignored

In [70]:
X[40]

0.7077896496544145

In [None]:
4, 0, 1: 0.12670
5, 0, 1: 0.08793

In [18]:
X = np.log(abs(1 + 2 * np.random.rand(100) + np.random.randn(100)))

In [19]:
## Adf Test
p = ndiffs(X, test='adf')  # 2

# KPSS test
q = ndiffs(X, test='kpss')  # 0

# PP test:
r = ndiffs(X, test='pp')  # 2
print(p, q, r)

0 0 0


In [16]:
X

array([ 1.27634356,  1.0418252 ,  2.3630003 ,  0.52977488,  3.24390484,
        1.30141855, -0.06287721,  2.9392314 ,  2.11015041,  2.33265332,
        2.55808472,  0.47058647,  1.8578164 ,  0.66527874,  1.67995364,
       -0.18613473,  2.75102932,  2.3158076 ,  1.39461294,  1.98658066,
        1.48805129,  1.95078067, -0.98661976,  2.52598897,  3.01224464,
        2.92923557,  3.08085817,  1.65996193,  3.00094882,  3.77984321,
        2.33658719,  2.01308967,  2.87785686,  1.39799577,  3.25911694,
        3.10173236,  3.73779328,  2.55694665,  2.53887584,  0.46783639,
        2.03840799,  0.90651426,  2.12468012,  1.40084598,  2.64796244,
        1.25110089,  0.78221098,  1.86899393,  2.32616048,  1.45056117,
        1.87004952,  2.34548157,  2.29533935,  0.21547568,  2.91561504,
       -1.10273835,  1.22898187,  1.57656775,  0.41188089,  1.88228755,
        0.06203251,  0.65520124,  0.60585063,  3.00612991,  1.28740355,
        1.16970428,  3.33852484,  3.42050802,  1.50273041,  1.15

In [None]:
y_pred

In [None]:
y_train