# Building a decision tree to predict stock flow

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime
import pandas as pd
import numpy as np
import talib as ta
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("all_stocks_5yr.csv", delimiter = ',', index_col = 'date', parse_dates = True)
df = df.loc[df['Name'] == 'AAPL']
df['volume'] = df['volume'] / df['volume'].rolling(20).mean()
df['range'] = (df['high'] - df['low']) / ta.ATR(df.high.values, df.low.values, df.close.values, 20)
df['return'] = df.open.shift(-2) - df.open.shift(-1)
df['EMA10'] = ta.EMA(df['close'].values, timeperiod = 10)
df['EMA30'] = ta.EMA(df['close'].values, timeperiod = 30)
df['ATR'] = ta.ATR(df['high'].values, df['low'].values, df['close'].values, timeperiod = 14)
df['ADX'] = ta.ADX(df['high'].values, df['low'].values, df['close'].values, timeperiod = 14)
df['RSI'] = ta.RSI(df['close'].values, timeperiod = 14)
macd, macdsignal, macdhist = ta.MACD(df['close'].values, fastperiod = 12, slowperiod = 26, signalperiod = 9)
df['MACD'] = macd
df['MACDsignal'] = macdsignal
df['ClgtEMA10'] = np.where(df['close'] > df['EMA10'], 1, -1)
df['EMA10gtEMA30'] = np.where(df['EMA10'] > df['EMA30'], 1, -1)
df['MACDSIGgtMACD'] = np.where(df['MACDsignal'] > df['MACD'], 1, -1)
df['target_cls'] = np.where(df['return'] > 0, 1, 0)
df['target_rgs'] = df['return']
df.dropna(inplace = True)
#df.drop(['Name'], axis = 1, inplace = True)
print(df.head())

In [None]:
predictors = ['ATR', 'ADX','RSI', 'ClgtEMA10', 'EMA10gtEMA30', 'MACDSIGgtMACD']
X = df[predictors]
X.tail()

In [None]:
y_cls = df.target_cls
y_rgs = df.target_rgs

In [None]:
from sklearn.model_selection import train_test_split
X_cls_train, X_cls_test, y_cls_train, y_cls_test = train_test_split(X, y_cls, test_size = 0.25, random_state = 42, stratify = y_cls)
print (X_cls_train.shape, y_cls_train.shape)
print (X_cls_test.shape, y_cls_test.shape)

In [None]:
train_length = int(len(df)*0.75)
X_rgs_train = X[:train_length]
X_rgs_test = X[train_length:]
y_rgs_train = y_rgs[:train_length]
y_rgs_test = y_rgs[train_length:]

print (X_rgs_train.shape, y_rgs_train.shape)
print (X_rgs_test.shape, y_rgs_test.shape)

## Classification

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion = 'entropy', max_depth = 3, min_samples_leaf = 6)
clf

In [None]:
clf = clf.fit(X_cls_train, y_cls_train)
clf

In [None]:
from sklearn import tree
import graphviz
dot_data = tree.export_graphviz(clf, out_file=None,filled=True,feature_names=predictors)
graphviz.Source(dot_data)

In [None]:
y_cls_pred = clf.predict(X_cls_test)
from sklearn.metrics import classification_report, accuracy_score
report = classification_report(y_cls_test, y_cls_pred)
print(report)
print()
print(accuracy_score(y_cls_test, y_cls_pred))

## Regression

In [None]:
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor(min_samples_leaf = 200)

In [None]:
dtr.fit(X_rgs_train, y_rgs_train)

In [None]:
dot_data = tree.export_graphviz(dtr, out_file = None, filled = True, feature_names=predictors)
graphviz.Source(dot_data)