<a href="https://colab.research.google.com/github/anjanshrestha123/Technical-Analysis-For-Stock-Price-Prediction/blob/master/technical_analysis_for_stock_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Technical Analysis for Stock Price Prediction #


Reference (Citation): https://github.com/krishnaik06/Stock-MArket-Forecasting

**Todo:**
1. Create different models and compare accuracy
2. Cross validation and hyperparameter tuning
4. Predict Trend
5. Format Code

###Import and Packages###

In [None]:
# For dealing with dataframe
import pandas as pd

# For dealing with np array
import numpy as np

# For calling yahoo finance to get stock price 
import pandas_datareader as pdr
import datetime as dt
from datetime import timedelta

# For plotting
import matplotlib.pyplot as plt

# For model
import math
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import plot_confusion_matrix

# For upgrading pandas datareader module
!pip install --upgrade pandas_datareader

Collecting pandas_datareader
  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
[K     |████████████████████████████████| 109 kB 5.1 MB/s 
Installing collected packages: pandas-datareader
  Attempting uninstall: pandas-datareader
    Found existing installation: pandas-datareader 0.9.0
    Uninstalling pandas-datareader-0.9.0:
      Successfully uninstalled pandas-datareader-0.9.0
Successfully installed pandas-datareader-0.10.0


### Model Properties ###

In [None]:
# Yahoo Finance API Properties
NUMBER_OF_YEARS_TO_FETCH_PRICE_DATA = 30

# Dataset Properties
DATE = 'Date'
CLOSE = 'Close'
VOLUME = 'Volume'

# Stock Properties 
STOCK_TICKER = 'TSLA' # Stock ticker name to run the model

# Hyperparameters
NUMBER_OF_DAYS_FOR_PRICE_PREDICTION = 100
NUMBER_OF_DAYS_TO_PREDICT = 10
NUMBER_OF_EPOCHS = 25

##  1. Extract Raw data and Dataset Creation ##
Calling Yahoo Finance API to extract data for last 7 years from current date and convert it to dataframe

In [None]:
# Getting start and end date for stock data
end_date = dt.date.today()
start_date = end_date - timedelta(days=NUMBER_OF_YEARS_TO_FETCH_PRICE_DATA * 365)  # Getting start date as last 'n' number of years from now

# Calling Yahoo Finance API for last 7 years of stock data 
df = pdr.get_data_yahoo(STOCK_TICKER, start = start_date, end = end_date)

RemoteDataError: ignored

In [None]:
# Visualizing Dataset
df

In [None]:
# Visualizing the shape of Dataset
df.shape

## 2. Exploratory Data Analysis

In [None]:
# Plotting the graph visualizing price change with date
df.plot(y=[CLOSE],figsize=(15,10), ylabel='Stock Price')

## 3. Feature Engineering ##

In [None]:
model_df = df.reset_index()[CLOSE]
model_df

## 4. Split data into train and test ##


### I. Splitting Data for LSTM ###


In [None]:
# Tranforming value to 0-1 since lstm are sensitive to the scale of the data
scaler = MinMaxScaler(feature_range=(0,1))
model_df_lstm = scaler.fit_transform(np.array(model_df).reshape(-1,1))
model_df_lstm

In [None]:
# Split the first 70% of data to training set and last 30% to testing set since our dataset is time-series data
train_index_lstm = 0.7 * model_df_lstm.shape[0]
train_data_lstm = model_df_lstm[:int(train_index_lstm)]
test_data_lstm = model_df_lstm[int(train_index_lstm):]
test_data_lstm

In [None]:
# Function to create dataset into feature and target
def create_dataset_lstm(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i+time_step, 0])
    return np.array(dataX), np.array(dataY)
        

In [None]:
# Creating dataset for training and testing
X_train_lstm, y_train_lstm = create_dataset_lstm(train_data_lstm, NUMBER_OF_DAYS_FOR_PRICE_PREDICTION)
X_test_lstm, y_test_lstm = create_dataset_lstm(test_data_lstm, NUMBER_OF_DAYS_FOR_PRICE_PREDICTION)

In [None]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train_lstm = X_train_lstm.reshape(X_train_lstm.shape[0], X_train_lstm.shape[1], 1)
X_test_lstm = X_test_lstm.reshape(X_test_lstm.shape[0], X_test_lstm.shape[1], 1)

print('Training Shape: ', X_train_lstm.shape)
print('Testing Shape: ', X_test_lstm.shape)

### II. Splitting Data for other Models ###


In [None]:
# Split the first 70% of data to training set and last 30% to testing set since our dataset is time-series data
train_index = 0.7 * model_df.shape[0]
train_data = list(model_df[:int(train_index)])
test_data = list(model_df[int(train_index):])

In [None]:
# Function to create dataset into feature and target
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step-1):
        a = dataset[i:(i+time_step)]
        dataX.append(a)
        dataY.append(dataset[i+time_step])
    return np.array(dataX), np.array(dataY)

In [None]:
# Creating dataset for training and testing
X_train, y_train = create_dataset(train_data, NUMBER_OF_DAYS_FOR_PRICE_PREDICTION)
X_test, y_test= create_dataset(test_data, NUMBER_OF_DAYS_FOR_PRICE_PREDICTION)

## 5. Create and Train Different Models##

### I. Create and Train LSTM Model ###

In [None]:
# Create Stacked LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(NUMBER_OF_DAYS_FOR_PRICE_PREDICTION,1)))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
model.fit(X_train_lstm, y_train_lstm, validation_data=(X_test_lstm, y_test_lstm), epochs=NUMBER_OF_EPOCHS, batch_size=64, verbose=1)

### II. Create and Train Linear Regression ###

In [None]:
# Create and Train Linear Regression Model
reg = LinearRegression().fit(X_train, y_train)

### III. Create and Train Decision Tree Regression ###

In [None]:
# Create and Train Decision Tree Model
d_tree = DecisionTreeRegressor().fit(X_train, y_train)

### IV. Create and Train KNN Regression 


In [None]:
knn = KNeighborsRegressor().fit(X_train, y_train)

## 6. Evaluate the Model##

In [None]:
# Shift train predictions for plotting
def plot_graph(model_df, model, X_train, X_test):
  # Making prediction for train and test set for plotting
  train_predict = model.predict(X_train)
  test_predict = model.predict(X_test)

  look_back = NUMBER_OF_DAYS_FOR_PRICE_PREDICTION
  train_predict_plot = [ np.nan for i in range(len(model_df))]
  train_predict_plot[look_back:len(train_predict) + look_back] = np.array(train_predict)

  # Shift test predictions for plotting
  test_predict_plot = [ np.nan for i in range(len(model_df))]
  test_predict_plot[len(train_predict) + (look_back*2)+1:len(model_df)-1] = list(test_predict)

  # Plot baseline and predictions
  plt.plot(model_df)
  plt.plot(train_predict_plot)
  plt.plot(test_predict_plot)
  plt.show()


### I. Evaluate LSTM ###

In [None]:
# Predict and check performance metrics
train_predict_lstm = model.predict(X_train_lstm)
test_predict_lstm = model.predict(X_test_lstm)

In [None]:
# Transform back to original form
train_predict_lstm = scaler.inverse_transform(train_predict_lstm)
test_predict_lstm = scaler.inverse_transform(test_predict_lstm)

In [None]:
# Test Data RMSE
rmse_lstm = math.sqrt(mean_squared_error(y_test_lstm, test_predict_lstm))
rmse_lstm

In [None]:
# Shift train predictions for plotting
look_back = NUMBER_OF_DAYS_FOR_PRICE_PREDICTION
train_predict_plot = np.empty_like(model_df_lstm)
train_predict_plot[:, :] = np.nan
train_predict_plot[look_back:len(train_predict_lstm) + look_back, :] = train_predict_lstm

# Shift test predictions for plotting
test_predict_plot = np.empty_like(model_df_lstm)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict_lstm) + (look_back*2)+1:len(model_df_lstm)-1, :] = test_predict_lstm

# Plot baseline and predictions
plt.plot(scaler.inverse_transform(model_df_lstm))
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()


### II. Evaluate Linear Regression ###

In [None]:
# Test Data RMSE
rmse_reg = math.sqrt(mean_squared_error(reg.predict(X_test), y_test))
rmse_reg

In [None]:
# Plot train and test prediction in graph
plot_graph(model_df, reg, X_train, X_test)

### III. Evaluate Decision Tree Regression ###

In [None]:
# Test Data RMSE
rmse_tree = math.sqrt(mean_squared_error(d_tree.predict(X_test), y_test))
rmse_tree

In [None]:
# Plot train and test prediction in graph
plot_graph(model_df, d_tree, X_train, X_test)

### IV. Evaluate KNN Regression ###

In [None]:
# Test Data RMSE
rmse_knn = math.sqrt(mean_squared_error(knn.predict(X_test), y_test))
rmse_knn

In [None]:
# Plot train and test prediction in graph
plot_graph(model_df, d_tree, X_train, X_test)

## 8. Predict Stock Market movement Trend for Next 30 Days Using Best Model ##

In [None]:
# For LSTM
lastest_data_index = len(test_data_lstm) - NUMBER_OF_DAYS_FOR_PRICE_PREDICTION
x_input=test_data_lstm[lastest_data_index:].reshape(1,-1)

temp_input=list(x_input)
temp_input=temp_input[0].tolist()

# demonstrate prediction for next n days
lst_output=[]
n_steps=NUMBER_OF_DAYS_FOR_PRICE_PREDICTION
i=0
while(i<NUMBER_OF_DAYS_TO_PREDICT):
    
    if(len(temp_input)>NUMBER_OF_DAYS_FOR_PRICE_PREDICTION):
        x_input=np.array(temp_input[1:])
        x_input=x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())
        lst_output.extend(yhat.tolist())
        i=i+1

day_new=np.arange(1,NUMBER_OF_DAYS_FOR_PRICE_PREDICTION + 1)
day_pred=np.arange(NUMBER_OF_DAYS_FOR_PRICE_PREDICTION + 1, NUMBER_OF_DAYS_FOR_PRICE_PREDICTION + NUMBER_OF_DAYS_TO_PREDICT + 1)

lastest_model_df_index = len(model_df_lstm) - NUMBER_OF_DAYS_FOR_PRICE_PREDICTION

df3=model_df_lstm.tolist()
df3.extend(lst_output)
df3=scaler.inverse_transform(df3).tolist()

plt.figure(figsize=(15,10))
plt.plot(df3)

In [None]:
# For other model
lastest_data_index = len(test_data) - NUMBER_OF_DAYS_FOR_PRICE_PREDICTION
x_input=test_data[lastest_data_index:]

temp_input=list(x_input)
temp_input=[temp_input[0]]

# demonstrate prediction for next n days
lst_output = []
last_n_days_data = test_data[lastest_data_index : ]
next_day = None
for day in range(NUMBER_OF_DAYS_TO_PREDICT):
  if next_day is not None:
    last_n_days_data = last_n_days_data[1:]
    last_n_days_data.append(next_day)
  next_day = reg.predict([last_n_days_data])[0]
  lst_output.append(next_day)

day_new=np.arange(1,NUMBER_OF_DAYS_FOR_PRICE_PREDICTION + 2)
day_pred=np.arange(NUMBER_OF_DAYS_FOR_PRICE_PREDICTION + 1, NUMBER_OF_DAYS_FOR_PRICE_PREDICTION + NUMBER_OF_DAYS_TO_PREDICT + 1)

lastest_model_df_index = len(model_df) - NUMBER_OF_DAYS_FOR_PRICE_PREDICTION

plt.figure(figsize=(15,10))

last_n_days = list(model_df[lastest_model_df_index:])
last_n_days.append(lst_output[0])

plt.plot(day_new,last_n_days)
plt.plot(day_pred,lst_output)

In [None]:
model_df.iloc[-1]

## 9. Predict Stock Price for Next Day Using Best Model ##

In [None]:
# Displaying next day stock closing price for LSTM
print('Stock Closing Price for next day: ', scaler.inverse_transform(lst_output)[0][0])

In [None]:
# Next day price using other model
last_n_days_data=test_data[len(test_data) - NUMBER_OF_DAYS_FOR_PRICE_PREDICTION:]
next_day = reg.predict([last_n_days_data])[0]
next_day