<a href="https://colab.research.google.com/github/johnlangen/StockPredictionsWithML/blob/main/randomforest_onlyclose.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
import math
from itertools import cycle

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [66]:
#get stock data for Apple for past two years
ticker = "AAPL"
start_date = "2021-01-01"
end_date = "2023-04-01"

apple_data = pd.DataFrame(yf.download(ticker, start = start_date, end = end_date)).reset_index()
apple_data.dropna(inplace = True)

[*********************100%***********************]  1 of 1 completed


In [67]:
apple_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-01-04,133.520004,133.610001,126.760002,129.410004,127.680008,143301900
1,2021-01-05,128.889999,131.740005,128.429993,131.009995,129.258591,97664900
2,2021-01-06,127.720001,131.050003,126.379997,126.599998,124.907555,155088000
3,2021-01-07,128.360001,131.630005,127.860001,130.919998,129.1698,109578200
4,2021-01-08,132.429993,132.630005,130.229996,132.050003,130.284683,105158200


In [68]:
#gets previous 10 day closing prices as input
def create_dataset(dataset, time_step=10):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [69]:
#get closing prices - this is our target
closing_prices = apple_data[['Close']]


In [70]:
#scalar to transform and standardize the data
scalar = MinMaxScaler(feature_range = (0,1))
closing_prices = scalar.fit_transform(np.array(closing_prices).reshape(-1, 1))


In [71]:
#split into train and test - train is usually 65% of dataset
train_size = int(len(closing_prices) * .65)
test_size = len(closing_prices) - train_size

train_data = closing_prices[:train_size, :]
test_data = closing_prices[train_size:, :]



In [72]:
time_step = 10
x_train, y_train = create_dataset(train_data, time_step)
x_test, y_test = create_dataset(test_data, time_step)

In [73]:
regressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
regressor.fit(x_train, y_train)

In [74]:
train_predict=regressor.predict(x_train)
test_predict=regressor.predict(x_test)

train_predict = train_predict.reshape(-1,1)
test_predict = test_predict.reshape(-1,1)

print("Train data prediction:", train_predict.shape)
print("Test data prediction:", test_predict.shape)

Train data prediction: (356, 1)
Test data prediction: (187, 1)


In [75]:
train_predict = scalar.inverse_transform(train_predict)
test_predict = scalar.inverse_transform(test_predict)
original_ytrain = scalar.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scalar.inverse_transform(y_test.reshape(-1,1)) 

In [76]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))
print("-------------------------------------------------------------------------------------")
print("The train model score is", regressor.score(x_train, y_train))
print("The test model score is ", regressor.score(x_test, y_test))

Train data RMSE:  1.1220649715583393
Train data MSE:  1.259029800398217
Train data MAE:  0.8449979374917728
-------------------------------------------------------------------------------------
Test data RMSE:  3.1722883113524474
Test data MSE:  10.063413130343363
Test data MAE:  2.470922556994434
-------------------------------------------------------------------------------------
The train model score is 0.9955561080102
The test model score is  0.9067387486938585


In [85]:
closing_prices = scalar.fit_transform(np.array(apple_data[['Close']]).reshape(-1,1))
close_stock = apple_data[["Date" , "Close"]] 

trainPredictPlot = np.empty_like(closing_prices)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[time_step:len(train_predict)+time_step, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closing_prices)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(time_step*2)+1:len(closing_prices)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])

#plot
plotdf = pd.DataFrame({'date': close_stock['Date'],
                       'original_close': close_stock['Close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (565, 1)
Test predicted data:  (565, 1)


In [78]:
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()


In [79]:
#get last 10 days of closing prices
x_input=test_data[len(test_data)-time_step:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()
print(temp_input)


[0.6251332384104082, 0.6537700252740422, 0.6316832456388681, 0.6484386294468392, 0.6685453689280356, 0.6385377313978724, 0.6289413118794505, 0.6764662268230244, 0.7006855183038083, 0.7393754424816814]


In [80]:
pred_output=[]
n_steps=time_step
i=0
pred_days = 10

#goes to else first
while(i<pred_days):
    if(len(temp_input)>time_step):
        #keep shifting one day forward for next prediction
        x_input=np.array(temp_input[1:])
        x_input=x_input.reshape(1,-1)
        y_pred = regressor.predict(x_input)
        temp_input.extend(y_pred.tolist())
        temp_input=temp_input[1:]     
        pred_output.extend(y_pred.tolist())
        i=i+1       
    else:
      #get prediction for day 1
        y_pred = regressor.predict(x_input)     
        temp_input.extend(y_pred.tolist())
        pred_output.extend(y_pred.tolist())    
        i=i+1
        


In [84]:
last_days=np.arange(1,time_step+1)
day_pred=np.arange(time_step+1,time_step+pred_days+1)

null_matrix = np.empty((len(last_days)+pred_days+1,1))
null_matrix[:] = np.nan
null_matrix = null_matrix.reshape(1,-1).tolist()[0]

last_days_value = null_matrix.copy()
next_days_value = null_matrix.copy()
actual_days_value = null_matrix.copy()

actual_values = [166.17, 165.63, 163.76, 164.66, 162.03, 160.8,160.1, 165.56, 165.21, 165.23]
actual_days_value[time_step:] = [x for x in actual_values]

next_days_value[time_step:] =scalar.inverse_transform(np.array(pred_output).reshape(-1,1)).reshape(1,-1).tolist()[0]
last_days_value[0:time_step+1] = scalar.inverse_transform(closing_prices[len(closing_prices)-time_step:]).reshape(1,-1).tolist()[0]

names = cycle(['Last 10 days Closing Price','Predicted next 10 days Closing Price', 'Actual Next 10 Days Clsoing Price'])

#plot
new_pred_plot = pd.DataFrame({
    'last_days':last_days_value,
    'next_days':next_days_value,
    'actual_days': actual_days_value
})

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_days'],
                                                      new_pred_plot['next_days'],
                                                      new_pred_plot['actual_days']],
              labels={'value': 'Stock price','index':'Time'})
              

fig.update_layout(title_text='Compare last 10 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))


fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()