In [1]:
import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA

import warnings
from tqdm.notebook import tqdm

warnings.filterwarnings('ignore')

In [2]:
def readucr(filename):
    data = np.loadtxt(filename)
    Y = data[:, 0]
    X = data[:, 1:]
    return X, Y

In [3]:
fname = "../../datasets/MixedShapesSmallTrain/MixedShapesSmallTrain"

x_train, y_train = readucr(fname + "_TRAIN.txt")
x_test, y_test = readucr(fname + "_TEST.txt")

In [4]:
summary_df = pd.DataFrame()

for i, row in tqdm(enumerate(x_train), total=len(x_train)):
    time_series = pd.Series(row)
    model = ARIMA(time_series, order=(1, 0, 0))
    model_fit = model.fit()
    
    summary_statistics = {
        "Mean": time_series.mean(),
        "Median": time_series.median(),
        "Minimum": time_series.min(),
        "Maximum": time_series.max(),
        "Standard Deviation": time_series.std(),
        "Variance": time_series.var(),
        "Skewness": time_series.skew(),
        "Kurtosis": time_series.kurtosis(),
        "Autocorrelation (lag 1)": time_series.autocorr(1),
        "Autocorrelation (lag 12)": time_series.autocorr(12),
        "Autocorrelation (lag 24)": time_series.autocorr(24),
        "Partial Autocorrelation (lag 1)": pd.Series(sm.tsa.stattools.pacf(time_series, nlags=1)).iloc[-1],
        "Partial Autocorrelation (lag 12)": pd.Series(sm.tsa.stattools.pacf(time_series, nlags=12)).iloc[-1],
        "Partial Autocorrelation (lag 24)": pd.Series(sm.tsa.stattools.pacf(time_series, nlags=24)).iloc[-1],
        "Augmented Dickey-Fuller Test (ADF)": sm.tsa.stattools.adfuller(time_series)[0],
        "KPSS Test": sm.tsa.stattools.kpss(time_series)[0],
        "Ljung-Box Q-Statistic": sm.stats.diagnostic.acorr_ljungbox(time_series, lags=[1]).loc[1][0],
        "Breusch-Godfrey LM Test (lag 1)": sm.stats.diagnostic.acorr_breusch_godfrey(model_fit, nlags=1)[0],
        "Jarque-Bera Test": sm.stats.stattools.jarque_bera(model_fit.resid)[0],
    }

    # Create a DataFrame to display the summary statistics
    summary_stats = pd.DataFrame(summary_statistics, index=[i])
    summary_df = pd.concat([summary_df, summary_stats])

  0%|          | 0/100 [00:00<?, ?it/s]

In [5]:
summary_df

Unnamed: 0,Mean,Median,Minimum,Maximum,Standard Deviation,Variance,Skewness,Kurtosis,Autocorrelation (lag 1),Autocorrelation (lag 12),Autocorrelation (lag 24),Partial Autocorrelation (lag 1),Partial Autocorrelation (lag 12),Partial Autocorrelation (lag 24),Augmented Dickey-Fuller Test (ADF),KPSS Test,Ljung-Box Q-Statistic,Breusch-Godfrey LM Test (lag 1),Jarque-Bera Test
0,-2.230469e-10,0.202233,-2.768117,1.299555,1.0,1.0,-1.320166,1.206041,0.999369,0.966948,0.883821,0.999116,-0.057891,-0.003627,-2.907995,0.622511,1023.185896,-334.050955,2.705874e+06
1,-4.354489e-10,0.282819,-2.301573,1.439139,1.0,1.0,-0.844043,-0.371806,0.999470,0.957344,0.861283,0.999777,4.229177,-1.172004,-2.684520,1.190591,1024.541949,-185.874895,1.001770e+06
2,4.163087e-10,0.308884,-2.520600,1.545732,1.0,1.0,-1.020462,0.380723,0.999767,0.977616,0.918372,1.000096,16.138409,0.173878,-3.171301,0.337277,1025.194997,-477.578002,5.103624e+06
3,-1.193506e-09,-0.059168,-2.023829,2.295538,1.0,1.0,-0.007866,-0.644111,0.999261,0.927516,0.773311,0.996544,-0.019289,-0.026892,-3.033249,0.230055,1017.925497,-596.143702,1.155163e+07
4,4.242282e-10,0.059496,-2.585603,1.918500,1.0,1.0,-0.554382,0.043926,0.999475,0.941155,0.809326,0.996973,-0.019199,-0.010614,-2.555585,1.573338,1018.802348,-867.944557,1.529564e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1.985644e-10,-0.037019,-1.506735,2.014736,1.0,1.0,0.122834,-1.372074,0.999693,0.967828,0.883222,0.998141,-0.035436,-0.020229,-3.291150,0.405277,1021.190337,-1012.970257,2.011131e+07
96,-7.408208e-11,-0.031385,-1.703345,1.952273,1.0,1.0,0.088966,-1.203614,0.999657,0.967764,0.883872,0.998782,-0.064497,-0.005773,-3.916389,0.398699,1022.502115,-822.177103,1.474280e+07
97,-2.879883e-10,0.022606,-1.767749,1.935001,1.0,1.0,-0.032496,-1.210016,0.999660,0.965228,0.872048,0.997680,-0.031412,-0.012031,-3.680116,0.465370,1020.247271,-726.178491,2.085361e+07
98,-1.132226e-09,0.029026,-1.816792,2.174878,1.0,1.0,-0.008071,-1.130238,0.999607,0.960774,0.857433,0.999538,-0.058187,-0.003989,-3.470021,0.454329,1024.050896,-488.127384,4.495195e+06
