In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import sklearn.metrics as metrics

from sklearn.model_selection import TimeSeriesSplit
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

# https://www.statsmodels.org/dev/examples/notebooks/generated/stationarity_detrending_adf_kpss.html
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss


# https://towardsdatascience.com/time-series-modeling-using-scikit-pandas-and-numpy-682e3b8db8d1
# https://facebook.github.io/prophet/docs/quick_start.html#python-api

In [10]:
def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)

In [11]:
# ADF test is used to determine the presence of unit root in the series, and hence helps in understand if the series is stationary or not. The null and alternate hypothesis of this test are:

# Null Hypothesis: The series has a unit root. (p-value > 0.05)
# Alternate Hypothesis: The series has no unit root. (p-value < 0.05)

# If the null hypothesis in failed to be rejected, this test may provide evidence that the series is non-stationary.

# A function is created to carry out the ADF test on a time series.

def kpss_test(timeseries):
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(
        kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"]
    )
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)

In [12]:
# Data download
# ==============================================================================
url = ('https://raw.githubusercontent.com/arifpras/forecastingDemand/main/20230126_database.csv')
data = pd.read_csv(url, sep=',', header=0, names=['month', 'incoming_bid_avg_sun', 'incoming_bid_avg_sbn', 'dpk_bankumum', 'dv_bankindonesia', 'vix_index', 'bfcius_bberg'])

# Data preparation
# ==============================================================================
data = data.rename(columns={'month': 'date'})
data['date'] = pd.to_datetime(data['date'], format='%Y/%m/%d')
data = data.set_index('date')
# data = data.rename(columns={'incoming_bid_avg_sun': 'incoming_bid_avg_sun'})
data = data.asfreq('MS')
data = data.sort_index()
data.head()

Unnamed: 0_level_0,incoming_bid_avg_sun,incoming_bid_avg_sbn,dpk_bankumum,dv_bankindonesia,vix_index,bfcius_bberg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-01-01,38.90175,38.90175,2770.57007,0,20.403182,-1.200591
2012-02-01,32.32425,18.45775,2763.94286,0,18.39,-0.785571
2012-03-01,12.3525,8.04325,2825.9749,0,16.167273,-0.655591
2012-04-01,17.163,11.39625,2841.36139,0,17.769524,-0.710952
2012-05-01,14.01525,8.320625,2908.95747,0,21.034783,-0.828


In [15]:
sm.tsa.stattools.adfuller(data[['incoming_bid_avg_sun']], maxlag=None, regression='c', autolag='AIC', store=False, regresults=False)

(-1.197427392462703,
 0.674701402476912,
 6,
 119,
 {'1%': -3.4865346059036564,
  '5%': -2.8861509858476264,
  '10%': -2.579896092790057},
 969.5112686044872)

In [18]:
adf_test(data['incoming_bid_avg_sun'])
# kpss_test(data['incoming_bid_avg_sun'])

Results of Dickey-Fuller Test:
Test Statistic                  -1.197427
p-value                          0.674701
#Lags Used                       6.000000
Number of Observations Used    119.000000
Critical Value (1%)             -3.486535
Critical Value (5%)             -2.886151
Critical Value (10%)            -2.579896
dtype: float64


In [19]:
adf_test(data['dpk_bankumum'])

Results of Dickey-Fuller Test:
Test Statistic                   1.854171
p-value                          0.998451
#Lags Used                       8.000000
Number of Observations Used    117.000000
Critical Value (1%)             -3.487517
Critical Value (5%)             -2.886578
Critical Value (10%)            -2.580124
dtype: float64


In [20]:
adf_test(data['vix_index'])

Results of Dickey-Fuller Test:
Test Statistic                  -4.340198
p-value                          0.000378
#Lags Used                       0.000000
Number of Observations Used    125.000000
Critical Value (1%)             -3.483779
Critical Value (5%)             -2.884954
Critical Value (10%)            -2.579257
dtype: float64


In [21]:
adf_test(data['bfcius_bberg'])

Results of Dickey-Fuller Test:
Test Statistic                  -3.761603
p-value                          0.003326
#Lags Used                       2.000000
Number of Observations Used    123.000000
Critical Value (1%)             -3.484667
Critical Value (5%)             -2.885340
Critical Value (10%)            -2.579463
dtype: float64


In [None]:
# creating new dataframe from incoming_bid_avg_sun column
data_incbid = data[['incoming_bid_avg_sun']]
# inserting new column with dpk_bankumum values
data_incbid.loc[:,'dpk_bankumum_t-1'] = data[['dpk_bankumum']].shift()
# inserting new column with dv_bankindonesia values
data_incbid.loc[:,'dv_bankindonesia'] = data[['dv_bankindonesia']]
# inserting new column with dpk_bankumum values
data_incbid.loc[:,'vix_index'] = data[['vix_index']]

# dropping NAs
data_incbid = data_incbid.dropna()

data_incbid