### Stock Price Prediction using Stacked LSTM

#### 1) We will collect the stocks data - AAPL
#### 2) Preprocess the data - Train & Test
#### 3) Create an Stacked LSTM model
#### 4) Predict the test data and plot the output
#### 5) Predict the Future 30 days and plot the output


In [None]:
# Data Collection

import pandas_datareader as pdr
import pandas as pd

In [None]:
# Fetching Apple stocks data using API

import requests
headers = {
    'Content-Type': 'application/json'
}
response = requests.get("https://api.tiingo.com/tiingo/daily/aapl/prices?startDate=2015-1-1&endDate=2024-1-1&token=b1ab3b096f8156cc006b0c83b06decdd0290b522", headers=headers)
print(response.json)


In [None]:
# Decode the response content from bytes to a string
data_str = response.content.decode('utf-8')
    
# Parse the JSON response into a Python dictionary
data = response.json()

# Add a missing field with a default value to each record
default_value = "AAPL"
for record in data:
    record["symbol"] = default_value

# Print the modified data (for debugging)
print(data)


In [None]:
# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('AAPL_data.csv', index=False)

print("Data has been saved to AAPL_data.csv")

In [None]:
df = pd.read_csv('AAPL_data.csv');

In [None]:
df.head(5)

In [None]:
# Bringing the symbol column from Last position to First

# Extract Last Column
last_column = df.iloc[:, -1]

# Remove Last Column
df = df.iloc[:, :-1]

# Insert Column at the Beginning
df.insert(0, 'symbol', last_column)

# Replace 'new_data.csv' with your desired file path
df.to_csv('Modified_AAPL_data.csv', index=False)

# Display the modified DataFrame
df.head(5)

In [None]:
df.tail(5)

In [None]:
df1 = df.close

In [None]:
df1

In [None]:
import matplotlib.pyplot as plt
plt.plot(df1)

In [None]:
### LSTM are sensitive to the scale of the data. So we apply MinMax scaler 
import numpy as np

###Importing MinMaxScaler: The code imports the MinMaxScaler class from the sklearn.preprocessing module, which is used for scaling features to a specified range.
from sklearn.preprocessing import MinMaxScaler

###Initializing the Scaler: The code initializes an instance of MinMaxScaler named scaler, specifying the desired feature range as (0, 1).
scaler=MinMaxScaler(feature_range=(0,1))

###Scaling the Data: The fit_transform() method of the MinMaxScaler object is used to scale the data in df1. The np.array(df1).reshape(-1,1) part converts the data in df1 to a numpy array and reshapes it to have one column. 
###This is required because MinMaxScaler expects the input to be a 2D array with shape (n_samples, n_features).
df1=scaler.fit_transform(np.array(df1).reshape(-1,1))
print(df1)

In [None]:
##splitting dataset into train and test split
training_size=int(len(df1)*0.65)
test_size=len(df1)-training_size
train_data,test_data=df1[0:training_size,:],df1[training_size:len(df1),:1]

In [None]:
training_size,test_size

In [None]:
import numpy
# convert an array of values into a dataset matrix
def create_dataset(dataset, time_step=1):
	dataX, dataY = [], []    #This line initializes two empty lists, dataX and dataY, which will be used to store input and output data for the dataset.
	for i in range(len(dataset)-time_step-1):
		a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
		dataX.append(a)
		dataY.append(dataset[i + time_step, 0])
	return numpy.array(dataX), numpy.array(dataY)

In [None]:
# reshape into X=t,t+1,t+2,t+3 and Y=t+4
time_step = 100
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

In [None]:
print(X_train.shape), print(y_train.shape)

In [None]:
print(X_test.shape), print(y_test.shape)

In [None]:
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

In [None]:
### Create the Stacked LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

In [None]:
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(100,1)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

In [None]:
model.summary()

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=100,batch_size=64,verbose=1)

In [None]:
import tensorflow as tf

In [None]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

In [None]:
##Transformback to original form
train_predict=scaler.inverse_transform(train_predict)
test_predict=scaler.inverse_transform(test_predict)

In [None]:
### Calculate RMSE performance metrics
import math
from sklearn.metrics import mean_squared_error
math.sqrt(mean_squared_error(y_train,train_predict))

In [None]:
### Test Data RMSE
math.sqrt(mean_squared_error(y_test,test_predict))

In [None]:
### Plotting 
# shift train predictions for plotting
look_back=100
trainPredictPlot = numpy.empty_like(df1)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict

# shift test predictions for plotting
testPredictPlot = numpy.empty_like(df1)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(df1)-1, :] = test_predict

# plot baseline and predictions
plt.plot(scaler.inverse_transform(df1))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

In [None]:
len(test_data)

In [None]:
x_input=test_data[493:].reshape(1,-1)
x_input.shape

In [None]:
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

In [None]:
temp_input

In [None]:
# demonstrate prediction for next 90 days
from numpy import array

lst_output=[]
n_steps=300
i=0
while(i<90):
    
    if(len(temp_input)>300):
        #print(temp_input)
        x_input=np.array(temp_input[1:])
        print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        #print(x_input)
        yhat = model.predict(x_input, verbose=0)
        print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        print(yhat[0])
        temp_input.extend(yhat[0].tolist())
        print(len(temp_input))
        lst_output.extend(yhat.tolist())
        i=i+1
    

print(lst_output)

In [None]:
day_new=np.arange(1,301)
day_pred=np.arange(301,391)

In [None]:
import matplotlib.pyplot as plt

In [None]:
len(df1)

In [None]:
plt.plot(day_new,scaler.inverse_transform(df1[1964:]))
plt.plot(day_pred,scaler.inverse_transform(lst_output))

In [None]:
df3=df1.tolist()
df3.extend(lst_output)
plt.plot(df3[1000:])

In [None]:
df3=scaler.inverse_transform(df3).tolist()

In [None]:
plt.plot(df3)