# Stock Trading Strategy with ARIMA model

In [20]:
import pandas as pd
import numpy as np
%matplotlib inline

from statsmodels.tsa.arima_model import ARMA,ARMAResults,ARIMA,ARIMAResults
from statsmodels.tsa.seasonal import seasonal_decompose      # for ETS Plots
from pmdarima import auto_arima # for determining ARIMA orders

import warnings
warnings.filterwarnings("ignore")

import yfinance as yf

from backtesting import Strategy, Backtest
from backtesting.lib import crossover

import time
from datetime import datetime, date
from pytz import timezone

import inspect
from typing import Tuple

## Getting data and clean it up

In [2]:
# Getting the last 6 years data at once
def get_data(ticker):
    data = yf.download(ticker, period="6y")
    data = data.asfreq('D')
    data = data.ffill()
    return data

In [3]:
get_data("SPY")

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-03-16,206.710007,208.690002,205.860001,208.580002,185.548767,136099200.0
2015-03-17,207.690002,208.419998,206.979996,207.960007,184.997223,94510400.0
2015-03-18,207.389999,211.270004,206.619995,210.460007,187.221176,228808500.0
2015-03-19,209.960007,210.470001,209.029999,209.500000,186.367233,117917300.0
2015-03-20,209.710007,211.020004,209.490005,210.410004,188.012238,177715100.0
...,...,...,...,...,...,...
2021-03-12,392.070007,394.209991,391.200012,394.059998,394.059998,64608100.0
2021-03-13,392.070007,394.209991,391.200012,394.059998,394.059998,64608100.0
2021-03-14,392.070007,394.209991,391.200012,394.059998,394.059998,64608100.0
2021-03-15,392.070007,394.209991,391.200012,394.059998,394.059998,64608100.0


### Making a function to predict stock price with ARIMA Model

In [4]:
# Making a function to create prediction of days future
# Based on given data, returning "days" future predicted price

def arima_fcst(p, q, d, data, days):
    model = ARIMA(data['Close'], order=(p,d,q))
    results = model.fit()
    fcst = results.predict(len(data),len(data)+40, dynamic=False, typ='levels')  #.rename('ARIMA Forecast') #levels, linear
    return fcst[days]

### Making a for loop to run thru 10 tickers to get a one consolidated table

In [5]:
tickers = ['SPY', 'QQQ', 'EEM', 'AAPL', 'MSFT', 'AMZN', 'FB', 'GOOGL', 'GOOG', 'TSLA']

In [6]:
# Setting parameters
p = 0
d = 1
q = 3
days = 5    

In [7]:
ARIMA_df_list = []

In [8]:
# Getting ARIMA signals for each ticker

for ticker in tickers:
    data = get_data(ticker)
    
    ARIMA_pred = []
    
    for n in range(len(data)-50):
        ARIMA_pred.append(arima_fcst(p, q, d, data.iloc[0+n:50+n], days))      
        
    data2 = data[50:]
    data2['ARIMA_Pred'] = ARIMA_pred
    data2['Arima_Signal'] = data2['ARIMA_Pred']/data2['Close']
    
    ARIMA_df_list.append(data2)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [9]:
ARIMA_df_list[0].head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ARIMA_Pred,Arima_Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-05-05,211.029999,211.460007,208.729996,208.899994,186.662964,113326200.0,211.524341,1.012563
2015-05-06,209.559998,209.929993,206.759995,208.039993,185.894501,135060200.0,209.470867,1.006878
2015-05-07,207.919998,209.380005,207.520004,208.869995,186.636154,88244900.0,208.281596,0.997183
2015-05-08,210.880005,211.860001,210.779999,211.619995,189.093414,155877300.0,208.905185,0.987171
2015-05-09,210.880005,211.860001,210.779999,211.619995,189.093414,155877300.0,210.90072,0.996601


### Defining trading strategy

In [10]:
def get_signal(data):
    return data['Arima_Signal']

In [11]:
class ARIMA_Pred(Strategy):
    low = 0.98
    high = 1.02 
    long_only = 1
    
    def init(self):
        self.signal = self.I(get_signal, self.data)
            
    def next(self):
        if self.signal > self.high:  
            if self.long_only == 0:
                self.position.close()
            self.buy()

        elif self.signal < self.low:
            self.position.close()
            if self.long_only == 0:
                self.sell()

### Backtesting ARIMA strategy

In [17]:
# Selecting the df for the ticker

selected_ticker = "SPY"

if selected_ticker in tickers:
    index = tickers.index(selected_ticker)
else:
    print("No ARIMA model available for this ticker.")

backtest_df = ARIMA_df_list[index] 

In [21]:
def backtest_ARIMA(ydata: pd.DataFrame, cash: int=10_000, commission: float=0.) -> Tuple[pd.DataFrame, dict]:
    temp = []
    sname_temp = []
    equity_trades = {}
    periods = ['0.5', '1', '2', 2020, 2019, 2018, 2017, 2016]
    
    for period in periods:
        if isinstance(period, str):
            data = ydata.iloc[-int(float(period)*365):]
        elif isinstance(period, int):
            data = ydata.loc["{}-12-31".format(period-1):"{}-12-31".format(period),]

        # No data
        if data.shape[0] == 0:
            continue

        bt = Backtest(data, ARIMA_Pred, cash=cash, commission=commission)
        stats = bt.run()
        sname = str(stats["_strategy"])
        sname_temp.append("{}_{}".format(sname, period))
        temp.append(stats[:27])
        equity_trades["{}_{}".format(sname, period)] = (stats["_equity_curve"], stats["_trades"])

    strat_returns = pd.concat(temp, axis=1)
    strat_returns.columns = sname_temp
    return strat_returns, equity_trades

In [22]:
backtest_ARIMA(backtest_df, 10_000, 0.)

(                             ARIMA_Pred_0.5         ARIMA_Pred_1  \
 Start                   2020-09-16 00:00:00  2020-03-17 00:00:00   
 End                     2021-03-16 00:00:00  2021-03-16 00:00:00   
 Duration                  181 days 00:00:00    364 days 00:00:00   
 Exposure Time [%]                   70.3297              74.2466   
 Equity Final [$]                    10344.8              12245.7   
 Equity Peak [$]                     10418.1              12617.1   
 Return [%]                           3.4479              22.4568   
 Buy & Hold Return [%]               17.3012              57.2152   
 Return (Ann.) [%]                   7.03461              22.4568   
 Volatility (Ann.) [%]               14.1718              19.4177   
 Sharpe Ratio                       0.496381              1.15651   
 Sortino Ratio                      0.718338              1.92473   
 Calmar Ratio                       0.953573              1.70533   
 Max. Drawdown [%]                