In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from statsmodels.tsa.stattools import adfuller

# Step 1: Create synthetic non-stationary time series data
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', periods=100, freq='D')
values = np.random.randn(100) + np.arange(100) * 0.1  # Trend + random noise
data = pd.Series(values, index=dates)

# Step 2: Perform the ADF test to check for stationarity
def adf_test(series):
    result = adfuller(series)
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    
    return result[1]

# Check stationarity
adf_p_value = adf_test(data)
if adf_p_value > 0.05:
    print("\nThe time series is likely non-stationary (consider differencing).")
else:
    print("\nThe time series is likely stationary.")

# Step 3: Differencing the data if non-stationary
if adf_p_value > 0.05:
    data_diff = data.diff().dropna()  # First order differencing
else:
    data_diff = data


ADF Statistic: -0.15026113477284184
p-value: 0.9441964694670332

The time series is likely non-stationary (consider differencing).


In [4]:
# Step 4: Use auto_arima to determine p, d, and q
model = auto_arima(data_diff, seasonal=False, stepwise=True, trace=True)

# Step 5: Print the best parameters
print(model.summary())


Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=295.161, Time=0.17 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=334.882, Time=0.02 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=311.006, Time=0.05 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=292.635, Time=0.03 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=294.181, Time=0.06 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=294.031, Time=0.05 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=inf, Time=0.43 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.27 sec

Best model:  ARIMA(0,0,1)(0,0,0)[0]          
Total fit time: 1.071 seconds
                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                   99
Model:               SARIMAX(0, 0, 1)   Log Likelihood                -144.318
Date:                Thu, 03 Oct 2024   AIC                            292.635
Time:                        16:37:57   BIC         