## Importing Libraries

In [1]:
import pandas as pd

In [2]:
from sklearn.preprocessing import LabelEncoder

In [3]:
from sklearn.ensemble import RandomForestClassifier

## Loading Data and Preprocessing

In [4]:
DATA = "FINAL_DATA/indicatos_price_data.csv"

In [5]:
indicators_df = pd.read_csv(DATA)

In [6]:
accepted_cols =  ["Date", "Symbol", "close_price_change"] + list(filter(lambda x: x.find("_signal") != -1, indicators_df.columns))

In [7]:
indicators_df = indicators_df.filter(accepted_cols)

In [8]:
indicators_df.sample(frac=1).head(n=6)

Unnamed: 0,Date,Symbol,close_price_change,atr_signal,cci_signal,cmf_21_signal,ema_5_10_signal,ema_20_100_signal,macd_signal,mfi_signal,obv_1_signal,pgo_signal,rsi_signal,sma_5_10_signal,sma_20_100_signal,stochrsi_signal,stoch_signal,trix_signal,wr_signal
1825,2016-06-22,AXISBANK,0,Low Volatility,Neutral,Buy,Sell,Buy,Buy,Neutral,Sell,Neutral,Neutral,Sell,Buy,Sell,Neutral,Buy,Oversold
1326,2015-10-19,ICICIBANK,0,Low Volatility,Neutral,Buy,Buy,Sell,Buy,Neutral,Divergence,Neutral,Neutral,Buy,Sell,Buy,Neutral,Buy,Overbought
1221,2015-08-26,ICICIBANK,0,Low Volatility,Sell,Buy,Sell,Sell,Sell,Neutral,Sell,Neutral,Neutral,Sell,Sell,Sell,Neutral,Sell,Neutral
734,2015-01-01,AXISBANK,1,Low Volatility,Buy,Sell,Buy,Sell,Buy,Neutral,Buy,Neutral,Neutral,Buy,Buy,Buy,Neutral,Buy,Overbought
2652,2017-08-02,AXISBANK,1,Low Volatility,Neutral,Sell,Sell,Buy,Buy,Neutral,Divergence,Neutral,Neutral,Sell,Buy,Sell,Neutral,Buy,Neutral
3221,2018-05-09,AXISBANK,1,Low Volatility,Buy,Buy,Buy,Sell,Buy,Neutral,Buy,Neutral,Neutral,Buy,Sell,Buy,Neutral,Buy,Overbought


### Converting text signals to numeric data

In [9]:
label_encoders = {}
for signal in list(filter(lambda x: x.find("_signal") != -1, indicators_df.columns)):
    le = LabelEncoder()
    le.fit(indicators_df[signal])
    indicators_df[signal] = le.transform(indicators_df[signal])
    label_encoders[signal] = le

In [10]:
indicators_df = indicators_df.sample(frac=1)

In [11]:
X_train = indicators_df[indicators_df.Date < "2019-01-01"].drop(["Date", "Symbol", "close_price_change", "obv_1_signal"], axis=1).values
X_test = indicators_df[indicators_df.Date >= "2019-01-01"].drop(["Date", "Symbol", "close_price_change", "obv_1_signal"], axis=1).values

In [12]:
Y_train = indicators_df[indicators_df.Date < "2019-01-01"]["close_price_change"].values
Y_test = indicators_df[indicators_df.Date >= "2019-01-01"]["close_price_change"].values

In [13]:
len(X_train)

3699

In [14]:
len(X_test)

243

## Random Forest Classification

In [15]:
rf_clf = RandomForestClassifier(n_estimators=1000, max_depth=20, n_jobs=4)

In [16]:
rf_clf.fit(X_train, Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=20, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=1000,
                       n_jobs=4, oob_score=False, random_state=None, verbose=0,
                       warm_start=False)

In [17]:
rf_clf.score(X_test, Y_test)*100

69.95884773662551

### Feature Importance

In [18]:
indicators = ['atr_signal',
 'cci_signal',
 'cmf_21_signal',
 'ema_5_10_signal',
 'ema_20_100_signal',
 'macd_signal',
 'mfi_signal',
 'pgo_signal',
 'rsi_signal',
 'sma_5_10_signal',
 'sma_20_100_signal',
 'stochrsi_signal',
 'stoch_signal',
 'trix_signal',
 'wr_signal']

In [19]:
indicators_importances = rf_clf.feature_importances_

In [20]:
for ind,imp in zip(indicators, indicators_importances):
    print("{} --> {}%".format(ind, imp*100))

atr_signal --> 0.7659662302952319%
cci_signal --> 7.363237386405676%
cmf_21_signal --> 4.304851338085142%
ema_5_10_signal --> 2.8128689968326985%
ema_20_100_signal --> 3.511572805446997%
macd_signal --> 2.5325826172843526%
mfi_signal --> 2.5614309759452976%
pgo_signal --> 1.3723430451484224%
rsi_signal --> 3.2698195318694787%
sma_5_10_signal --> 4.87413731469514%
sma_20_100_signal --> 3.6881654631651952%
stochrsi_signal --> 13.315500135711492%
stoch_signal --> 28.071520464734377%
trix_signal --> 2.7443338399059187%
wr_signal --> 18.81166985447458%


### Exporting Test Data

In [44]:
test_export = indicators_df[indicators_df.Date >= "2019-01-01"].filter(["Date", "Symbol", "close_price_change"])

In [45]:
test_export["indicators_prediction"] = list(rf_clf.predict(X_test))

In [47]:
test_export.head(n=10)

Unnamed: 0,Date,Symbol,close_price_change,indicators_prediction
3872,2019-03-25,HDFCBANK,1,1
3725,2019-01-11,AXISBANK,1,1
3733,2019-01-16,HDFCBANK,0,0
3851,2019-03-13,HDFCBANK,1,1
3861,2019-03-19,AXISBANK,1,1
3771,2019-02-04,ICICIBANK,0,0
3903,2019-04-09,AXISBANK,1,0
3802,2019-02-18,ICICIBANK,0,0
3757,2019-01-28,HDFCBANK,0,0
3706,2019-01-03,ICICIBANK,0,1


In [48]:
test_export.to_csv("FINAL_DATA/predicted_indicators.csv", header=True, index=False, sep="~")

### Confusion Matrix

In [37]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [42]:
tn, fp, fn, tp = confusion_matrix(Y_test, list(rf_clf.predict(X_test))).ravel()

In [43]:
print(tn, fp, fn, tp)

83 38 35 87
