In [1]:
import pandas as pd
import numpy as np
pd.options.display.float_format = '{:.2f}'.format
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import scipy.stats
import pylab
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.seasonal import seasonal_decompose
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')
import yfinance as yf

# Extract Data

In [2]:
# get stock data for apple
data = yf.download('AAPL', start = '2015-01-01', end = '2021-01-01')

[*********************100%***********************]  1 of 1 completed


# Setting up raw data for analysis

In [3]:
#get only 'Adj Close','Volume'
stock = data[['Adj Close','Volume']].copy()

In [4]:
#set higher frequency to business days
stock = stock.asfreq('b')

# Create Returns

Data already inspected in AR and MA models. It was concluded that return was the best process.

In [5]:
# Using Returns
stock_r = stock['Adj Close'].pct_change(1).mul(100)
stock_r = stock_r.iloc[1:]
stock_r = stock_r.asfreq('b')
stock_r = stock_r.to_frame('returns')

# Fitting Model

In [9]:
#LLR (Log Likely Ratio) Test
from scipy.stats import chi2
def LLR_Test(mod_1, mod_2, DF = 1):
    L1 = mod_1.fit().llf
    L2 = mod_2.fit().llf
    LR = (2*(L2 - L1))
    P = chi2.sf(LR, DF).round(3)
    return P

In [21]:
#P & D = 0 and q <> 0 makes ARIMA into MA model
model_arma_1 = ARIMA(stock_r, order = (1,0,1))
results_arma_1 = model_arma_1.fit()
print(results_arma_1.summary())

                               SARIMAX Results                                
Dep. Variable:                returns   No. Observations:                 1564
Model:                 ARIMA(1, 0, 1)   Log Likelihood               -3159.299
Date:                Tue, 02 Nov 2021   AIC                           6326.599
Time:                        21:38:38   BIC                           6348.019
Sample:                    01-05-2015   HQIC                          6334.562
                         - 12-31-2020                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1241      0.043      2.891      0.004       0.040       0.208
ar.L1         -0.0081      0.150     -0.054      0.957      -0.302       0.285
ma.L1         -0.0948      0.153     -0.618      0.5

# Test if ARMA performs better than AR and MA

In [43]:
model_ar_1 = ARIMA(stock_r, order = (1,0,0))
results_ar_1 = model_ar_1.fit()
model_ma_1 = ARIMA(stock_r, order = (0,0,1))
results_ma_1 = model_ma_1.fit()

In [24]:
print('ARMA vs AR',LLR_Test(model_ar_1,model_arma_1))
print('ARMA vs MA',LLR_Test(model_ma_1,model_arma_1))

ARMA vs AR 0.738
ARMA vs MA 0.974


In [62]:
print('\n ARMA \tLL =',results_arma_1.llf,'\tARMA \tAIC =',results_arma_1.aic)
print('\n AR \tLL =',results_ar_1.llf,'\tAR \tAIC =',results_ar_1.aic)
print('\n MA \tLL =',results_ma_1.llf,'\tMA \tAIC =',results_ma_1.aic)


 ARMA 	LL = -3159.2993576816766 	ARMA 	AIC = 6326.598715363353

 AR 	LL = -3159.3553606810055 	AR 	AIC = 6324.710721362011

 MA 	LL = -3159.299904010827 	MA 	AIC = 6324.599808021654


# Model Selection