<a href="https://colab.research.google.com/github/anaghaclement/IntraDayStockPrediction/blob/main/Copy_of_IntraDayStockPricePrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Import all the required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import keras

from keras import backend as K
from keras import optimizers

## from keras.utils import plot_model
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Flatten
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
!pip install chart_studio
import chart_studio.plotly as py

from sklearn import preprocessing

## import chart_studio.graph_objs as go
## from chart_studio.offline import init_notebook_mode, iplot

%matplotlib inline
## init_notebook_mode(connected=True)

# suppress warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
url='https://raw.githubusercontent.com/anaghaclement/IntraDayStockPrediction/main/reliancedf_5min.csv'
data = pd.read_csv(url)

#Loading Data

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
#data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d %H:%M')
#data = data.set_index('date')
#data.head()

#Plot Time Series Data

In [None]:
data.shape

In [None]:
data.isnull().value_counts()

In [None]:
data.describe()

In [None]:
data.rename(columns = {'Unnamed: 0':'Period'}, 
            inplace = True)
data.head()

In [None]:
data['date'].head(75)

Per day 5-minute data is 72 rows

In [None]:
data = data.drop(['Period','date'], axis=1)
#data = data.drop(['Period','date','volume'], axis=1)
data.head()

In [None]:
data.tail()

##Add Moving Average

In [None]:
ma_hourly = [12,24,72,144] 
# 1hour, 2hours, 6 hours, 12 hours Moving average since this is a 5-minutes interval data

for ma in ma_hourly:
    column_name = "MA for %s 5minutes" %(str(ma))
    data[column_name]=pd.DataFrame.rolling(data['close'],ma).mean()

In [None]:
	def get_rsi(close, lookback):
	    ret = close.diff()
	    up = []
	    down = []
	    for i in range(len(ret)):
	        if ret[i] < 0:
	            up.append(0)
	            down.append(ret[i])
	        else:
	            up.append(ret[i])
	            down.append(0)
	    up_series = pd.Series(up)
	    down_series = pd.Series(down).abs()
	    up_ewm = up_series.ewm(com = lookback - 1, adjust = False).mean()
	    down_ewm = down_series.ewm(com = lookback - 1, adjust = False).mean()
	    rs = up_ewm/down_ewm
	    rsi = 100 - (100 / (1 + rs))
	    rsi_df = pd.DataFrame(rsi).rename(columns = {0:'rsi'}).set_index(close.index)
	    rsi_df = rsi_df.dropna()
	    return rsi_df[3:]


In [None]:
lookback = 14
data['RSI'] =  get_rsi(data['close'], lookback)

In [None]:
data = data.dropna(axis=0)
data.reset_index(inplace=True, drop=True)

In [None]:
data.head(15)

## Plot Original Stock Price Chart

In [None]:
plt.figure(figsize=(12, 4))
plt.legend(loc='best')
plt.ylabel('Price', fontsize=16)
plt.xlabel('5-Min Interval', fontsize=16)
plt.plot(data['open'])
plt.title('Reliance Industries Open Price History over 3 years')
plt.show(block=False)

In [None]:
plt.figure(figsize=(12, 4))
plt.legend(loc='best')
plt.ylabel('Price', fontsize=16)
plt.xlabel('5-Min Interval', fontsize=16)
plt.plot(data['high'])
plt.title('Reliance Industries High Price History over 3 years')
plt.show(block=False)

In [None]:
plt.figure(figsize=(12, 4))
plt.legend(loc='best')
plt.ylabel('Price', fontsize=16)
plt.xlabel('5-Min Interval', fontsize=16)
plt.plot(data['low'])
plt.title('Reliance Industries Low Price History over 3 years')
plt.show(block=False)

In [None]:
plt.figure(figsize=(12, 4))
plt.legend(loc='best')
plt.ylabel('Price', fontsize=16)
plt.xlabel('5-Min Interval', fontsize=16)
plt.plot(data['close'])
plt.title('Reliance Industries Close Price History over 3 years')
plt.show(block=False)

##Normalise (Min Max Scaler)
open-high-low-close(ohlc) values normalized

In [None]:
# Data normaliser
data_normaliser = preprocessing.MinMaxScaler()
data_normalised = data_normaliser.fit_transform(data) 

history_points = 288 
#4 weeks data with 72 rows for one day - Scaled MSE = 29
#Tried history_points = 720
# using the last {history_points} open high low close volume data points, predict the next open value
data_histories_normalised = np.array([data_normalised[i: i + history_points].copy() for i in range(len(data_normalised) - history_points)])
print(data_histories_normalised.shape)

In [None]:
data_normalised

In [None]:
print(data.shape)

## Next Day Open Values Normalised

In [None]:
# next-day-open-values normalised
next_day_open_values_normalised = np.array([data_normalised[:,0][i + history_points].copy() for i in range(len(data_normalised) - history_points)])   
next_day_open_values_normalised = np.expand_dims(next_day_open_values_normalised, -1)

next_day_open_values = np.array([data.loc[:,"open"][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_open_values = np.expand_dims(next_day_open_values, -1)

y_scaler = preprocessing.MinMaxScaler()
y_scaler.fit(next_day_open_values)
    
print(data_histories_normalised.shape[0] == next_day_open_values_normalised.shape[0])

In [None]:
test_split = 0.8 # 80% stock-history for training, most-recent 20% stock-history for testing
n = int(data_histories_normalised.shape[0] * test_split)
print(n)

In [None]:
data_train = data_histories_normalised[:n]
y_train = next_day_open_values_normalised[:n]

data_test = data_histories_normalised[n:]
y_test = next_day_open_values_normalised[n:]

unscaled_y_test = next_day_open_values[n:]

In [None]:
print(data_train.shape)
print(y_train.shape)

print(data_test.shape)
print(y_test.shape)

print(unscaled_y_test.shape)

## Build Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense

In [None]:
# Build Model 
number_of_columns = 10
model = Sequential()
model.add(Input(shape=(history_points, number_of_columns)))
model.add(Conv1D(history_points, number_of_columns, padding='same', activation='sigmoid'))
#model.add(Conv1D(history_points, 5, padding='same', activation='relu'))
#model.add(LSTM(history_points, return_sequences=True))
model.add(LSTM(history_points))
model.add(Dense(64, activation='sigmoid'))
#model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam')
model.summary()

##Train Model

In [None]:
num_epochs= 25
batch_size = 64

In [None]:
# Train Model
model.fit(data_train, y_train, batch_size=batch_size, epochs=num_epochs, shuffle=True, validation_split=0.2, verbose=1)

## Evaluate Model

In [None]:
evaluation = model.evaluate(data_test, y_test)
print(evaluation)

## Test Model

In [None]:
y_test_predicted = model.predict(data_test)
print(y_test_predicted.shape)

# model.predict returns normalised values, now we scale them back up using the y_scaler from before
y_test_predicted = y_test_predicted.reshape(y_test_predicted.shape[0], y_test_predicted.shape[1])
y_test_predicted = y_scaler.inverse_transform(y_test_predicted)
print(y_test_predicted.shape)

In [None]:
y_predicted = model.predict(data_histories_normalised)
print(y_predicted.shape)
y_predicted = y_scaler.inverse_transform(y_predicted)
print(y_predicted.shape)

In [None]:
print(unscaled_y_test.shape == y_test_predicted.shape)
real_mse = np.mean(np.square(unscaled_y_test - y_test_predicted))
scaled_mse = real_mse / (np.max(unscaled_y_test) - np.min(unscaled_y_test)) * 100
print("RMSE      : {:.2f}".format(np.sqrt(scaled_mse)))

In [None]:
from sklearn.metrics import mean_absolute_error
#get the mean absolute error (average of the forecast error values - all of them forced to be positive)
mae = mean_absolute_error(unscaled_y_test, y_test_predicted)
print("MAE      : {:.2f}".format(mae))

In [None]:
#define MAPE
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
#get MAPE
mape = mean_absolute_percentage_error(unscaled_y_test, y_test_predicted)
print("MAPE      : {:.2f}".format(mape))

## Plot Chart

In [None]:
train  = data.loc[history_points: n+history_points,'open'] 
actual = data.loc[n+history_points:,'open']
print(len(train))
print(len(actual))

In [None]:
valid=pd.DataFrame()
valid['Actual'] = actual
valid['Prediction'] = y_test_predicted

## Display Chart

In [None]:
# Visualize the data
plt.figure(figsize=(16,8))
plt.title('Reliance Industries', fontsize=16)
plt.xlabel('5-minute interval', fontsize=16)
plt.ylabel('Open Price INR', fontsize=16)
plt.plot(train)
plt.plot(valid[['Actual','Prediction']])
plt.legend(['Train', 'Actual', 'Prediction'], loc='lower right')
plt.show()

## Display Actual vs Predicted

In [None]:
# Visualize the data
plt.figure(figsize=(16,8))
plt.title('Reliance Industries', fontsize=16)
plt.xlabel('Date', fontsize=16)
plt.ylabel('Open Price INR', fontsize=16)
plt.plot(valid[['Actual','Prediction']])
plt.legend(['Actual', 'Prediction'], loc='lower right')
plt.show()

##Plot Stock Prediction

In [None]:
# Plot stock prediction
plt.gcf().set_size_inches(22, 15, forward=True)
start = 0
end = -1
real = plt.plot(unscaled_y_test[start:end], label='real')
pred = plt.plot(y_test_predicted[start:end], label='predicted')
plt.title('Reliance Industries')
plt.legend(['Real', 'Predicted'])
plt.show()

## The End