### Importing required libraries.

The data is collected from yahoo finance on a daily basis which can be altered depending on the requirement . 

In [None]:
#Installing Yahoo Finance
#!pip install yfinance
#!pip install pandas_datareader
#!pip install scikeras
#!pip install imbalanced-learn
#!pip install xgboost

In [16]:
import pandas as pd
import talib as ta
import time, datetime
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import alpaca_trade_api as tradeapi
import os
import yfinance as yf
import pandas_datareader.data as pdr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.optimizers import SGD
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE 
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from itertools import product
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

### Importing Data from Yahoo Finance.

In [17]:
#Collecting desired ticker from user
ticker = input("Please enter your desired ticker")
start_time = datetime.datetime(1980,1,1)
end_time = datetime.datetime.now().date()

Please enter your desired ticker SPY


In [18]:
def get_data(ticker):
    df = pdr.get_data_yahoo(ticker, start=start_time, end=end_time)         
    return df

In [19]:
def compute_ema(df):
    df['EMA50'] = df['Close'].ewm(span=50, adjust=False).mean()
    #Setting adjust to False to specify that recursive calculation mode is required.
    df['EMA100'] = df['Close'].ewm(span=100, adjust=False).mean()
    #df['EMA150'] = EMA(df['Close'].values, timeperiod=150)
   # df['EMA200'] = EMA(df['Close'].values, timeperiod=200)
    return df


In [20]:
def computeMACD(df, n_fast, n_slow, n_smooth):
    data = df['Close']
    fastEMA = data.ewm(span=n_fast, min_periods=n_slow).mean()
    slowEMA = data.ewm(span=n_slow, min_periods=n_slow).mean()
    MACD = pd.Series(fastEMA-slowEMA, name ='MACD')
    df = df.join(MACD)
    return df

In [21]:
def compute_rsi_bb(df):
    df['upperBB'], df['middleBB'], df['lowerBB'] = ta.BBANDS(df['Close'].values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
        # we will normalize RSI
    df['RSI'] = ta.RSI(df['Close'].values, timeperiod=14)
    df['nor_RSI'] = ta.RSI(df['Close'].values, timeperiod=14)/100.0
    return df
    

### Creating the feature variables.

In [22]:
def calculated_features(df):
    df['aboveEMA50'] = np.where(df['Close'] > df['EMA50'], 1, 0)
    df['aboveEMA100'] = np.where(df['Close'] > df['EMA100'], 1, 0)
    df['aboveupperBB'] = np.where(df['Close'] > df['upperBB'], 1, 0)
    df['belowlowerBB'] = np.where(df['Close'] < df['lowerBB'], 1, 0)
    df['oversoldRSI'] = np.where(df['nor_RSI'] < 0.30, 1, 0)
    df['overboughtRSI'] = np.where(df['nor_RSI'] > 0.70, 1, 0)
    return df

In [23]:
# create a new column in the DF based on the conditions
def defined_conditions(df):
    # List of conditions
    condition = [(df['aboveEMA50'] == df['aboveEMA100']) & (df['aboveEMA50'] ==1) & (df['MACD']>0.5),
                 (df['aboveEMA50'] == df['aboveEMA100']) & (df['aboveEMA50'] ==0) & (df['MACD']<0.5),
                 (df['aboveEMA50'] != df['aboveEMA100'])]
    # List of values to return
    choice  = [1,0,2]

    df['Buy/Sell'] = np.select(condition, choice)
    
    return df
#Considering it as a buy if the price is over 50 ema and 100 ema.
#Considering it as a Sell if the price is under 50 ema and 100 ema. 
#Further analysis is required if the price is over 50 ema and under ema 100 or vice versa.

### Creating DF for multiple stocks and Indices.

In [24]:
# Requesting the stock Data from the user. 
tickers = ['AAPL','GOOG','AMD','AMZN']

In [27]:


#Calling the get_data function definition. 
df = get_data(ticker)
df = df.reset_index()
df = df.set_index('Date')
#Calculating EMA 50 and 100
df = compute_ema(df)
# Computing RSI and Bolinger Bands
df = compute_rsi_bb(df)
#Calculating if the signal is to buy or sell.
df = calculated_features(df)
#Computing the MACD indicator
df =computeMACD(df, 12, 26, 9)
# Rules for placind a trade.
df = defined_conditions(df)


for ticker in tickers:
    t_df = get_data(ticker)
    t_df = compute_ema(t_df)
    t_df = compute_rsi_bb(t_df)
    t_df = calculated_features(t_df)
    t_df = computeMACD(t_df, 12, 26, 9)
    t_df = defined_conditions(t_df)
    df = df.append(t_df, ignore_index=True)

In [28]:
df

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,EMA50,EMA100,upperBB,middleBB,...,RSI,nor_RSI,aboveEMA50,aboveEMA100,aboveupperBB,belowlowerBB,oversoldRSI,overboughtRSI,MACD,Buy/Sell
0,0.128906,0.128348,0.128348,0.128348,469033600.0,0.099874,0.128348,0.128348,,,...,,,0,0,0,0,0,0,,0
1,0.122210,0.121652,0.122210,0.121652,175884800.0,0.094663,0.128085,0.128215,,,...,,,0,0,0,0,0,0,,0
2,0.113281,0.112723,0.113281,0.112723,105728000.0,0.087715,0.127483,0.127909,,,...,,,0,0,0,0,0,0,,0
3,0.116071,0.115513,0.115513,0.115513,86441600.0,0.089886,0.127014,0.127663,,,...,,,0,0,0,0,0,0,,0
4,0.119420,0.118862,0.118862,0.118862,73449600.0,0.092492,0.126694,0.127489,,,...,,,0,0,0,0,0,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42953,95.019997,90.589996,93.970001,92.459999,84330300.0,92.459999,107.653923,115.595793,115.598892,98.116501,...,37.491756,0.374918,0,0,0,0,0,0,-4.833820,0
42954,93.349998,90.870003,92.620003,93.199997,62192000.0,93.199997,107.087103,115.152312,110.954220,96.746501,...,38.774275,0.387743,0,0,0,0,0,0,-4.708885,0
42955,94.580002,92.830002,93.239998,94.129997,59414700.0,94.129997,106.578981,114.736028,106.941600,95.670000,...,40.428527,0.404285,0,0,0,0,0,0,-4.483151,0
42956,94.430000,93.070000,93.790001,93.410004,35088600.0,93.410004,106.062551,114.313731,103.637479,94.792501,...,39.537869,0.395379,0,0,0,0,0,0,-4.312639,0


In [29]:
#df.tail(10)
df = df.dropna()
dataMACD = df[['MACD']].to_numpy()
minmaxscaler = MinMaxScaler()
df['MACD'] = minmaxscaler.fit_transform(dataMACD)
df.describe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,EMA50,EMA100,upperBB,middleBB,...,RSI,nor_RSI,aboveEMA50,aboveEMA100,aboveupperBB,belowlowerBB,oversoldRSI,overboughtRSI,MACD,Buy/Sell
count,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,...,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0,42833.0
mean,20.954036,20.398077,20.60526,20.681033,201194400.0,20.349179,20.358234,20.001324,22.05038,20.558741,...,52.659711,0.526597,0.588261,0.609343,0.076063,0.048677,0.036537,0.096188,0.548886,0.347816
std,35.736517,34.846238,35.336616,35.299247,282119400.0,35.179999,34.793758,34.202093,37.500036,35.125065,...,12.967612,0.129676,0.492154,0.487903,0.265102,0.215195,0.187625,0.294852,0.041963,0.671444
min,0.049665,0.049107,0.0,0.049107,0.0,0.038213,0.059529,0.063847,0.061434,0.055999,...,9.245771,0.092458,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.482143,0.464286,0.421875,0.47433,32670700.0,0.392708,0.465556,0.4646,0.514602,0.467299,...,43.19307,0.431931,0.0,0.0,0.0,0.0,0.0,0.0,0.54319,0.0
50%,6.8125,6.5625,6.6475,6.694286,111115200.0,6.38,6.559896,6.473822,7.366328,6.639775,...,52.500168,0.525002,1.0,1.0,0.0,0.0,0.0,0.0,0.545204,0.0
75%,20.742857,20.119337,20.404642,20.418571,246299200.0,19.3125,20.141476,19.431856,22.005554,20.242489,...,61.970984,0.61971,1.0,1.0,0.0,0.0,0.0,0.0,0.552844,0.0
max,188.654007,184.839493,187.199997,186.570496,7421641000.0,186.570496,174.853254,171.408028,192.129683,181.52135,...,94.214138,0.942141,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0


### Seperating Labels and Features from the Data frame and splitting the data

In [30]:
df = df.dropna()
X = df.drop(columns =['Buy/Sell'])
#Reviewing features Data Frame.
X.head(5)

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,EMA50,EMA100,upperBB,middleBB,lowerBB,RSI,nor_RSI,aboveEMA50,aboveEMA100,aboveupperBB,belowlowerBB,oversoldRSI,overboughtRSI,MACD
25,0.142857,0.142299,0.142857,0.142299,30083200.0,0.11073,0.136077,0.132944,0.160832,0.14442,0.128007,55.372003,0.55372,1,1,0,0,0,0,0.544945
26,0.146205,0.145089,0.145089,0.145089,15904000.0,0.112901,0.13643,0.133184,0.160496,0.145061,0.129627,57.306456,0.573065,1,1,0,0,0,0,0.544954
27,0.147879,0.146763,0.146763,0.146763,35548800.0,0.114203,0.136835,0.133453,0.160593,0.145508,0.130423,58.469647,0.584696,1,1,0,0,0,0,0.544966
28,0.147321,0.146205,0.146763,0.146205,11222400.0,0.113769,0.137203,0.133705,0.16065,0.145564,0.130477,57.90333,0.579033,1,1,0,0,0,0,0.544974
29,0.144531,0.143973,0.144531,0.143973,24640000.0,0.112032,0.137468,0.133909,0.158717,0.144838,0.130959,55.584215,0.555842,1,1,0,0,0,0,0.544971


In [31]:
y = df['Buy/Sell']
#Reviewing Label.
y.head(5)
y.value_counts()
## Found class Imbalances in the data set. 
# Suggested to use SMOTE Upsampling to balane the class.


0    32732
1     5304
2     4797
Name: Buy/Sell, dtype: int64

In [32]:
# Split the data into training and testing datasets
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state =1,stratify = y)

### Passing the Data to NN to establish a baseline accuracy.

#### Normalising the data using Standard Scaler

In [33]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Scale both the training and testing data from the features dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# encoding class labels as integers
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_Y = encoder.transform(y_train)

#### Adding Layers to Neural Network

In [34]:
def create_model():
	# create model
	model = Sequential()
	model.add(Dropout(0.2, input_shape=(20,)))
	model.add(Dense(11, activation='relu', kernel_constraint=MaxNorm(3)))
	model.add(Dense(6, activation='relu', kernel_constraint=MaxNorm(3)))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	sgd = SGD(learning_rate=0.08, momentum=0.9)
	model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
	return model
 

In [38]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(model=create_model, epochs=50, batch_size=2, verbose=1)))
pipeline = Pipeline(estimators)

In [39]:
kfold = StratifiedKFold(n_splits=5, shuffle=True)

In [None]:
results = cross_val_score(pipeline, X, y, cv=kfold)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [None]:
print("Visible: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

### Additional tune up and Learning rate and Epochs needed. 

### Creating Ensemble

In [64]:
from sklearn.metrics import log_loss
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=500, random_state=1)
clf3 = GaussianNB()
#clf2 = XGBClassifier()
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],voting='hard')


eclf.fit(X_train, y_train)
 
# predicting the output on the test dataset
pred_final = eclf.predict(X_test)
 
# printing log loss between actual and predicted value
print(accuracy_score(y_test, pred_final))

0.8536324786324786


In [65]:
for clf, label in zip([clf1, clf2, clf3,eclf], ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'Ensemble']):
    scores = cross_val_score(clf, X_test, y_test, scoring='accuracy', cv=9)
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

Accuracy: 0.45 (+/- 0.00) [Logistic Regression]
Accuracy: 1.00 (+/- 0.00) [Random Forest]
Accuracy: 0.50 (+/- 0.02) [Naive Bayes]
Accuracy: 0.85 (+/- 0.01) [Ensemble]



-> Back TEsting
-> 