# ARIMA Forecasting
The code below runs the ARIMA model for 8 different companies, 4 that maintained Dividend Champion status and 4 that lost status. The test forecasts are saved to a `.npy` file so they could be combined with the forecasts from the other models to be compared.

In [1]:
import os
import warnings
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error

In [2]:
warnings.filterwarnings('ignore')    # ignore warning

In [3]:
# Split Dataframe into train and test set
def split(dataframe, border, col):
    return dataframe.loc[:border,col], dataframe.loc[border:,col]

In [5]:
# Collect list of all companies we have data for

# Get list of file names
fileList = os.listdir("../data/series/good")

# Loop through file names and collect ticker symbols
companyList = []
for file in fileList:
    companyName = file.split("_")[0]
    if companyName not in [".DS",".ipynb"]:
        companyList.append(companyName)
print(companyList)

['ABM', 'ABT', 'ADM', 'ADP', 'AFL', 'ALB', 'AOS', 'APD', 'AROW', 'ARTNA', 'ATO', 'ATR', 'AWR', 'BANF', 'BDX', 'BEN', 'BKH', 'BMI', 'BMS', 'BOH', 'BRC', 'BRO', 'CAT', 'CBSH', 'CBU', 'CB', 'CFR', 'CHD', 'CINF', 'CLX', 'CL', 'CNI', 'CPKF', 'CSL', 'CSVI', 'CTAS', 'CTBI', 'CVX', 'CWT', 'DBD', 'DCI', 'DOV', 'EBTC', 'ECL', 'ED', 'EFSI', 'EGN', 'EMR', 'ENB', 'ERIE', 'ESS', 'EXPD', 'FELE', 'FFMR', 'FLIC', 'FMCB', 'FRT', 'FUL', 'GD', 'GPC', 'GRC', 'GWW', 'HP', 'HRL', 'IBM', 'ITW', 'JKHY', 'JNJ', 'KMB', 'KO', 'LANC', 'LECO', 'LEG', 'LIN', 'LLY', 'LOW', 'MATW', 'MCD', 'MCY', 'MDT', 'MDU', 'MGEE', 'MGRC', 'MKC', 'MMM', 'MO', 'MSA', 'MSEX', 'NC', 'NDSN', 'NEE', 'NFG', 'NIDB', 'NJR', 'NNN', 'NUE', 'NWN', 'ORI', 'OZK', 'O', 'PBCT', 'PBI', 'PEP', 'PG', 'PH', 'PII', 'PNR', 'PPG', 'PSBQ', 'RLI', 'RNR', 'ROP', 'ROST', 'RPM', 'RTX', 'SBSI', 'SCL', 'SEIC', 'SHW', 'SJW', 'SKT', 'SON', 'SPGI', 'SRCE', 'SWK', 'SYK', 'SYY', 'TDS', 'TFX', 'TGT', 'THFF', 'TMP', 'TNC', 'TRI', 'TROW', 'TR', 'TYCB', 'T', 'UBA', 'UBS

In [6]:
# Add to ignore list if no data is available
# Yahoo Finance will occasionally not through error
# for some companies during GetMetrics, which end up creating blank
# csvs that we can simply ignore here
ignoreList = ["FFMR","FMCB"]
stockList = list(set(companyList).difference(ignoreList))

# Load and store data in initial Dataframe
df_ = {}
for i in stockList:
    df_[i] = pd.read_csv("../data/series/good/" + i + "_dividends_fixed.csv", index_col="Date", parse_dates=["Date"])

# Create new Dataframe that contains data for each company
# split at specified year
df_new = {}
for i in stockList:
    df_new[i] = {}
    df_new[i]["Train"], df_new[i]["Test"] = split(df_[i], "2006", "0")

In [7]:
# ARIMA
def create_arima_model(train_data, test_data):
    training_data = train_data.values   
    test_data = test_data.values
    history = [x for x in training_data]    # make list of training data
    model_predictions = []

    for time_point in range(len(test_data)):
        model = ARIMA(history, order=(1, 1, 1))
        model_fit = model.fit()    # fit model
        output = model_fit.forecast()    # get out-of-sample forecasts (predictions)
        yhat = output[0]    # get value from array
        model_predictions.append(yhat)    # list of model predictions
        true_test_value = test_data[time_point]    # get true value
        history.append(true_test_value)    # append to history (training data)

    MSE_error = mean_squared_error(test_data, model_predictions)
    print(f'{company} - Testing Mean Squared Error - {MSE_error:.5f}')
    
    return model_predictions

In [8]:
kept_list = ['MGEE', 'TGT', 'BEN', 'CBSH']
lost_list = ['PBI', 'ABT', 'WRE', 'HP']

In [9]:
# Kept status
# Create models for all kept status companies
# and get forecasts from test set
for company in kept_list:
    dividends_train = df_new[company]['Train']
    dividends_test = df_new[company]['Test']
    
    model_predictions = create_arima_model(dividends_train, dividends_test)
    model_predictions = np.asarray(model_predictions)
    np.save(file = '../data/numpy/' + company + '_pred_arima.npy', arr = model_predictions)
    print(f'Forecast data saved for - {company}')

MGEE - Testing Mean Squared Error - 0.00003
Forecast data saved for - MGEE
TGT - Testing Mean Squared Error - 0.00053
Forecast data saved for - TGT
BEN - Testing Mean Squared Error - 0.00009
Forecast data saved for - BEN
CBSH - Testing Mean Squared Error - 0.00003
Forecast data saved for - CBSH


In [10]:
# Lost status
# Create models for all kept status companies
# and get forecasts from test set
for company in lost_list:
    dividends_train = df_new[company]['Train']
    dividends_test = df_new[company]['Test']
    
    model_predictions = create_arima_model(dividends_train, dividends_test)
    model_predictions = np.asarray(model_predictions)
    np.save(file = '../data/numpy/' + company + '_pred_arima.npy', arr = model_predictions)
    print(f'Forecast data saved for - {company}')

PBI - Testing Mean Squared Error - 0.00091
Forecast data saved for - PBI
ABT - Testing Mean Squared Error - 0.00039
Forecast data saved for - ABT
WRE - Testing Mean Squared Error - 0.00033
Forecast data saved for - WRE
HP - Testing Mean Squared Error - 0.00641
Forecast data saved for - HP
