In [56]:
import pandas_datareader as pdr
import datetime as dt
import numpy as np

In [108]:
ticker = "AAPL" # Apple Inc. Stock
data = pdr.get_data_yahoo(ticker, dt.datetime(2002,1,1), dt.datetime.now(), interval='d')

In [109]:
# Calculate the EMA10 > EMA30 signal
ema10 = data['Close'].ewm(span=10).mean()
ema30 = data['Close'].ewm(span=30).mean()
data['EMA10gtEMA30'] = np.where(ema10 > ema30, 1, -1)

In [110]:
# Calculate where Close is > EMA10
data['ClGtEMA10'] = np.where(data['Close'] > ema10, 1, -1)

In [111]:
# Calculate the MACD signal
exp1 = data['Close'].ewm(span=12).mean()
exp2 = data['Close'].ewm(span=26).mean()
macd = exp1 - exp2
macd_signal = macd.ewm(span=9).mean()
data['MACD'] = macd_signal - macd

In [112]:
# Calculate RSI
delta = data['Close'].diff()
up = delta.clip(lower=0)
down = -1*delta.clip(upper=0)
ema_up = up.ewm(com=13, adjust=False).mean()
ema_down = down.ewm(com=13, adjust=False).mean()
rs = ema_up/ema_down
data['RSI'] = 100 - (100/(1 + rs))

In [113]:
# Stochastic Oscillator
high14= data['High'].rolling(14).max()
low14 = data['Low'].rolling(14).min()
data['%K'] = (data['Close'] - low14)*100/(high14 - low14)

In [114]:

# Williams Percentage Range
data['%R'] = -100*(high14 - data['Close'])/(high14 - low14)

In [115]:
days = 6

# Price Rate of Change
ct_n = data['Close'].shift(days)
data['PROC'] = (data['Close'] - ct_n)/ct_n

ct_n.tail(10)

Date
2021-05-28    127.309998
2021-06-01    125.430000
2021-06-02    127.099998
2021-06-03    126.900002
2021-06-04    126.849998
2021-06-07    125.279999
2021-06-08    124.610001
2021-06-09    124.279999
2021-06-10    125.059998
2021-06-11    123.540001
Name: Close, dtype: float64

In [116]:
#Trees are the foundation in the Forest. Or Decision Trees are the foundation in a Forest Classifier. Hence, it is a good starting point to understand how a Decision Tree works. Luckily, they are quite easy to understand.

#Letâ€™s try to investigate a Decision Tree that is based on two of the indicators above. We take the RSI (Relative Strength Index) and %K (Stochastic Oscillator). A Decision Tree could look like this (depending on the training data).

![caption](/image.png)

#Decision Tree for %K and RSI
#When we get a new data row with %K and RSI indicators, it will start at the top of the Decision Tree.

#At the first node it will check if %K <= 4.615, if so, take the left child otherwise the right child.
#The gini tells us how a randomly chosen element would be incorrectly labeled. Hence, a low value close to 0 is good.
#Samples tells us how many of the samples of the training set reached this node.
#Finally, the value tells us how the values are distributed. In the final decision nodes, the category of most element is the prediction.
#Looking at the above Decision Tree, it does not seem to be very good. The majority of samples end up the fifth node with a gini on 0.498, close to random, right? And it will label it 1, growth.

#But this is the idea with Forest Classifiers, it will take a bunch of Decision Trees, that might not be good, and use majority of them to classify it.






/bin/bash: -c: line 0: syntax error near unexpected token `/image.png'
/bin/bash: -c: line 0: `[caption](/image.png)'


In [117]:
##Create the Forest Classifier


import pandas_datareader as pdr
import datetime as dt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [118]:
ticker = "AAPL"
data = pdr.get_data_yahoo(ticker, dt.datetime(2016,1,1), dt.datetime.now(), interval='d')

In [119]:
# Calculate the EMA10 > EMA30 signal
ema10 = data['Close'].ewm(span=10).mean()
ema30 = data['Close'].ewm(span=30).mean()
data['EMA10gtEMA30'] = np.where(ema10 > ema30, 1, -1)

In [120]:
# Calculate where Close is > EMA10
data['ClGtEMA10'] = np.where(data['Close'] > ema10, 1, -1)



In [121]:
# Calculate the MACD signal
exp1 = data['Close'].ewm(span=12).mean()
exp2 = data['Close'].ewm(span=26).mean()
macd = exp1 - exp2
macd_signal = macd.ewm(span=9).mean()
data['MACD'] = macd_signal - macd

In [122]:
# Calculate RSI
delta = data['Close'].diff()
up = delta.clip(lower=0)
down = -1*delta.clip(upper=0)
ema_up = up.ewm(com=13, adjust=False).mean()
ema_down = down.ewm(com=13, adjust=False).mean()
rs = ema_up/ema_down
data['RSI'] = 100 - (100/(1 + rs))

In [123]:
# Stochastic Oscillator
high14= data['High'].rolling(14).max()
low14 = data['Low'].rolling(14).min()
data['%K'] = (data['Close'] - low14)*100/(high14 - low14)

In [124]:
# Williams Percentage Range
data['%R'] = -100*(high14 - data['Close'])/(high14 - low14)

In [125]:
days = 6

# Price Rate of Change
ct_n = data['Close'].shift(days)
data['PROC'] = (data['Close'] - ct_n)/ct_n

In [126]:
# Set class labels to classify
data['Return'] = data['Close'].pct_change(1).shift(-1)
data['class'] = np.where(data['Return'] > 0, 1, 0)

In [127]:
# Clean for NAN rows
data = data.dropna()
# Minimize dataset
data = data.iloc[-200:]

In [128]:
# Data to predict
predictors = ['EMA10gtEMA30', 'ClGtEMA10', 'MACD', 'RSI', '%K', '%R', 'PROC']
X = data[predictors]
y = data['class']

In [129]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [130]:
# Train the model
rfc = RandomForestClassifier(random_state=0)
rfc = rfc.fit(X_train, y_train)

In [131]:
# Test the model by doing some predictions
y_pred = rfc.predict(X_test)

In [132]:
# See how accurate the predictions are
report = classification_report(y_test, y_pred)
print('Model accuracy', accuracy_score(y_test, y_pred, normalize=True))
print(report)

Model accuracy 0.5166666666666667
              precision    recall  f1-score   support

           0       0.62      0.46      0.52        35
           1       0.44      0.60      0.51        25

    accuracy                           0.52        60
   macro avg       0.53      0.53      0.52        60
weighted avg       0.54      0.52      0.52        60

