In [6]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import technical_indicator as ti
import plotting as pl

from finta import TA
import pandas_ta as pta

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier

import xgboost as xgb
from xgboost.sklearn import XGBClassifier
import datetime as dt

In [2]:
# ticker = yf.Ticker("AAPL")
# df_apple = ticker.history(period='5y')
df = yf.download('AAPL',period='5y')
df2 = yf.download(tickers='SPY', period='5y', auto_adjust=True, actions=True)
df.head(200)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-03-17,26.379999,26.617500,26.240000,26.450001,24.601379,137682800
2016-03-18,26.584999,26.625000,26.297501,26.480000,24.629284,176820800
2016-03-21,26.482500,26.912500,26.285000,26.477501,24.626959,142010800
2016-03-22,26.312500,26.822500,26.302500,26.680000,24.815302,129777600
2016-03-23,26.620001,26.767500,26.475000,26.532499,24.678110,102814000
...,...,...,...,...,...,...
2016-12-22,29.087500,29.127501,28.910000,29.072500,27.493036,104343600
2016-12-23,28.897499,29.129999,28.897499,29.129999,27.547411,56998000
2016-12-27,29.129999,29.450001,29.122499,29.315001,27.722363,73187600
2016-12-28,29.379999,29.504999,29.049999,29.190001,27.604153,83623600


In [None]:
# Plot candle chart
pl.plot_candle_chart(df)

In [None]:
for period in [5, 10, 20, 50, 100, 200]:
    df['SMA_{}'.format(str(period))] = ti.simple_ma(df['Adj Close'], period=period)
    df['EMA_{}'.format(str(period))] = ti.exp_ma(df['Adj Close'], period=period)
    

plt.figure(figsize=(20,10))
plt.plot(df['EMA_50'], label='50-day EMA')
plt.plot(df['EMA_200'], label='200-day EMA')
plt.plot(df['Adj Close'], label='Adj Close', linewidth=1)
plt.legend()
plt.show()

In [None]:
df_train = df[:int(df.shape[0]*0.7)]
df_test = df[int(df.shape[0]*0.7):]

# y_train = y[:int(X.shape[0]*0.7)]
# y_test = y[int(X.shape[0]*0.7):]



# Feature Engineering

### Features to be considered:
`1`. **Simple Moving Average**



`2`. Exponential Moving Average

`3`. Bollinger Bands

`4`. Daily return

`5`. Log volume

`6`. Rate of change

`7`. Price difference

`8`. Z-score

`9`. Relative Strengh Index (RSI)
Measures the magnitude of recent price changes. Commonly used in technical analysis to evaluate overbought or oversold conditions in the price of a stock.
This indicator was developed by J.Welles Wilder Jr.
According to this indicator, a stock is considered overbought when the RSI is above 70% and oversold when it is below 30%.

`10`. Moving Average Convergence Divergence (MACD)
MACD is a trend-following momentum indicator that demonstrates the relationship between two moving averages - long-term and short-term. MACD is calculated as 

MACD = EMA(26-period) - EMA(12-period)

MACD is often used together with Relative Strength (RSI) to show a more complete picture of a market.

The signal line is a 9-day EMA of the MACD Line. As a moving average of the indicator, it trails the MACD and makes it easier to spot MACD turns.

`11`. Stochastic Oscillator

The Stochastic Oscillator is a momentum indicator that compares a specific closing price of a security to its high-low range over a certain period of time. First developed in the 1950s, it is a popular momentum indicator to identify overbought and oversold conditions of an asset. 
	  
`12`. Accumulation/Distribution Indicator (A/D)

`13`. Average True Range(ATR)
ATR is a market volatility indicator. Introduced by market technician J.Welles Wilder Jr., this indicator measures market volatility by decomposing the complete range of a security price for that period.

`14`. Vortex Indicator
The Vortex Indicator consists of two oscillating lines: one to capture the upward trend movement and the other one to identify the downward price movement.

`15`. Ease of Movement

`16`. Commodity Channel Index (CCI)
A momentum-based oscillator used to identify an asset that is reaching a condition of being oversold or overbought.

`17`. On-Balance Volume (OBV)
A momentum-based indicator using volume flow to predict changes in asset price. First developed by Joseph Granville, he believed that the volume was the major force behind markets. As explained in his 1963 book Granville's New Key to Stock Market Profits, when volume increases strongly without a signficant change in the price of an asset, the price will eventually jump upwards or downwards. Granville theorised that volume precedes price. 

__How to interpret.__

We should expect prices to increase if OBV is rising while prices are either flat or moving down, and vice versa.

# References

https://www.investopedia.com/terms/r/rsi.asp
https://www.investopedia.com/terms/m/macd.asp
https://altfins.com/knowledge-base/macd-line-and-macd-signal-line/
https://www.investopedia.com/terms/s/stochasticoscillator.asp
https://www.investopedia.com/terms/a/accumulationdistribution.asp
https://www.investopedia.com/terms/a/atr.asp
https://www.investopedia.com/articles/active-trading/072115/understand-vortex-indicator-trading-strategies.asp
https://www.investopedia.com/terms/e/easeofmovement.asp
https://www.investopedia.com/terms/c/commoditychannelindex.asp
https://www.investopedia.com/terms/o/onbalancevolume.asp


The 5-, 10-, 20- and 50-day moving averages are frequently utilized to identify near-term trend changes.

In [3]:
# Daily return in percentage
df['DAILY_RETURN'] = ti.daily_return(df['Adj Close'])

# Rate of change in volume
df['ROC'] = ti.rate_of_change(df['Volume'])

# Price difference
df['DIFF'] = ti.price_diff(df['Adj Close'])

# Z-score
# df['ZSCORE'] = scipy.stats.zscore(df['Adj Close'])

# Bollinger Bands
df[['BB_MID', 'BB_UPPER', 'BB_LOWER']] = ti.bollinger_bands(df['Adj Close'])

# Relative Strengh Index (RSI)
df['RSI'] = ti.RSI(df['Adj Close'])

# Moving Average Convergence Divergence (MACD)
df[['MACD', 'MACD_signal']] = ti.MACD(df['Adj Close'])

# Stochastic Oscillator
df['STOCH'] = ti.stochastic_oscillator(df['Adj Close'], df['High'], df['Low'])

# Accumulation/Distribution
df['ADI'] = ti.accumulation_distribution(df['Adj Close'], df['Low'], df['High'], df['Volume'])

# Average True Range
df['ATR'] = ti.average_true_range(df['High'], df['Low'], df['Adj Close'])

# Vortex
df[['VI_up', 'VI_down']] = ti.vortex(df['High'], df['Low'], df['Adj Close'])

# Ease of Movement
df['EMV'] = ti.ease_of_movement(df['High'], df['Low'], df['Volume'])

# Commodity Channel Index
df['CCI'] = ti.commodity_channel(df['High'], df['Low'], df['Adj Close'])

# On-Balance Volume
df['OBV'] = ti.on_balance_volume(df['Adj Close'], df['Volume'])


Visualise daily return, rate of change and z-scores

In [None]:
plt.figure(figsize=(20,10))

sns.histplot(df['DAILY_RETURN'], kde=True)
plt.show()

sns.histplot(df['ROC'], kde=True)
plt.show()

# sns.histplot(df['ZSCORE'], kde=True)
# plt.show()


## Stock movement
Label column:

1 if daily return is > threshold

-1 if daily return is < threshold


In [4]:
threshold = 0

df.loc[:, 'BUY_SIGNAL'] = 0
df.loc[:, 'SELL_SIGNAL'] = 0

df.loc[df['DAILY_RETURN'].shift(-1) > threshold, 'BUY_SIGNAL'] = 1
df.loc[df['DAILY_RETURN'].shift(-1) < -threshold, 'SELL_SIGNAL'] = 1

# Drop NaN values
df = df.dropna()

In [None]:
df[['DAILY_RETURN', 'BUY_SIGNAL']]

In [None]:
# Splitting into train and test dataset
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 6:23], df['BUY_SIGNAL'], test_size=.3, shuffle=False)


In [None]:
plt.figure(figsize=(20,15))

plt.plot(X_train.index, X_train['DAILY_RETURN'], linewidth=1, label='Train data')
plt.plot(X_test.index, X_test['DAILY_RETURN'], linewidth=1, label='Data to predict')
plt.axhline(y=.0025, linewidth=1, color='g', linestyle='--', label='Threshold for buy signal')
plt.axhline(y=-.0025, linewidth=1, color='b', linestyle='--', label='Threshold for sell signal')

plt.legend()
plt.show()


## KNN

In [None]:
# Initialise model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

buy_pred = knn.predict(X_test)

print(accuracy_score(y_test, buy_pred))
print(classification_report(y_test, buy_pred))

## Random Forest


In [None]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

buy_pred_rf = rf.predict(X_test)

print(accuracy_score(y_test, buy_pred_rf))
print(classification_report(y_test, buy_pred_rf))

## XGBClassifier

In [None]:
xgb_classifier = xgb.XGBClassifier()
xgb_classifier.fit(X_train, y_train)

buy_pred = rf.predict(X_test)

print(accuracy_score(y_test, buy_pred))
print(classification_report(y_test, buy_pred))

In [None]:
plt.style.use('fivethirtyeight')

# Bollinger bands
plt.figure(figsize=(20,15))
plt.plot(df['Adj Close'], label='Adj Close', linewidth=1)
plt.fill_between(df['Adj Close'].index, df['BB_UPPER'], df['BB_LOWER'], alpha=.2)
plt.plot(df['BB_UPPER'], label='Upper band', linestyle='dashed', linewidth=2)
plt.plot(df['BB_LOWER'], label='Lower band', linestyle='dashed', linewidth=2)
plt.plot(df['BB_MID'], label='20-day simple moving average', linewidth=2)
plt.title('Bollinger bands')
plt.legend(fontsize=15)

plt.show()


In [None]:
# Plotting MACD

plt.style.use('ggplot')

fig = plt.figure(facecolor = 'white', figsize = (20,10))

ax0 = plt.subplot2grid((12,8), (1,0), rowspan=6, colspan=4)
ax0.plot(df[['Adj Close','BB_MID']], linewidth=2)
ax0.set_facecolor('ghostwhite')
ax0.legend(['Adj Close','SMA'],ncol=3, loc = 'upper left', fontsize = 12)
plt.title("SPY Adj Close and MACD", fontsize = 20)

ax1 = plt.subplot2grid((12,8), (7,0), rowspan=3, colspan=4, sharex = ax0)
ax1.plot(df[['MACD', 'MACD_signal']], linewidth=1)
ax1.legend(['MACD', 'MACD_signal'], ncol=3, loc = 'lower left', fontsize = 12)
ax1.set_facecolor('silver')
plt.subplots_adjust(left=.09, bottom=.09, right=1, top=.95, wspace=.20, hspace=0)
plt.show()

In [None]:
# Plotting OBV

fig = plt.figure(facecolor = 'white', figsize = (25,15))

ax0 = plt.subplot2grid((12,8), (1,0), rowspan=6, colspan=4)
ax0.plot(df[['Adj Close','BB_MID']], linewidth=2)
ax0.set_facecolor('ghostwhite')
ax0.legend(['Adj Close','SMA'],ncol=3, loc = 'upper left', fontsize = 12)
plt.title("SPY Adj Close and OBV", fontsize = 15)

ax1 = plt.subplot2grid((12,8), (7,0), rowspan=3, colspan=4, sharex = ax0)
ax1.plot(df['OBV'], linewidth=1)
ax1.legend(['OBV'], ncol=3, loc = 'upper left', fontsize = 12)
ax1.set_facecolor('silver')
plt.subplots_adjust(left=.09, bottom=.09, right=1, top=.95, wspace=.20, hspace=0)
plt.show()

In [None]:
# Add moving averages for multiple periods
for period in [5, 10, 20, 50, 100, 200]:
    df['SMA{}'.format(str(period))] = ti.simple_ma(df['Adj Close'], length=period)
    df['EMA{}'.format(str(period))] = ti.exp_ma(df['Adj Close'], length=period)
    