<a href="https://colab.research.google.com/github/amanjain252002/Stock-Price-Prediction/blob/main/ARIMA_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pmdarima



In [2]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARIMA

from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error

import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'numpy'

In [None]:
AAPL = pd.read_csv("/content/drive/MyDrive/Database/Pre_Processed_AAPL.csv")
TSLA = pd.read_csv("/content/drive/MyDrive/Database/Pre_Processed_TSLA.csv")
GOOG = pd.read_csv("/content/drive/MyDrive/Database/Pre_Processed_GOOG.csv")
MSFT = pd.read_csv("/content/drive/MyDrive/Database/Pre_Processed_MSFT.csv")
AMZN = pd.read_csv("/content/drive/MyDrive/Database/Pre_Processed_AMZN.csv")

The Dickey-Fuller test is one of the most popular statistical tests. It can be used to determine the presence of unit root in the series and help us understand if the series is stationary.

**Null Hypothesis**: The series has a unit root

**Alternate Hypothesis**: The series has no unit root.

If we fail to reject the Null Hypothesis, then the series is non-stationary.

In [None]:
def Test_Stationarity(timeseries):
  result = adfuller(timeseries['Adj. Close'], autolag = 'AIC')
  print("Results of Dickey Fuller Test")
  print(f'Test Statistics: {result[0]}')
  print(f'p-value: {result[1]}')
  print(f'Number of lags used: {result[2]}')
  print(f'Number of observations used: {result[3]}')
  for key, value in result[4].items():
    print(f'critical value ({key}): {value}')

#Apple

In [None]:
AAPL.head()

In [None]:
AAPL.info()

In [None]:
# Change Dtype of Date column
AAPL["Date"] = pd.to_datetime(AAPL["Date"])

In [None]:
Test_Stationarity(AAPL)

The p-value > 0.05, so we cannot reject the Null hypothesis. Hence, we would need to use the “Integrated (I)” concept, denoted by value ‘d’ in time series, to make the data stationary while building the Auto ARIMA model.

Now let's take log of the 'Adj. Close' column to reduce the magnitude of the values and reduce the series rising trend.

In [None]:
AAPL['log Adj. Close'] = np.log(AAPL['Adj. Close'])
AAPL_log_moving_avg = AAPL['log Adj. Close'].rolling(12).mean()
AAPL_log_std = AAPL['log Adj. Close'].rolling(12).std()

plt.figure(figsize = (10, 5))
plt.plot(AAPL['Date'], AAPL_log_moving_avg, label = "Rolling Mean")
plt.plot(AAPL['Date'], AAPL_log_std, label = "Rolling Std")
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')
plt.title("Rolling Mean and Standard Deviation")

###Split the data into training and test set

    Training Period: 2015-01-02 - 2020-09-30

    Testing Period:  2020-10-01 - 2021-02-26







In [None]:
AAPL_Train_Data = AAPL[AAPL['Date'] < '2020-10-01']
AAPL_Test_Data = AAPL[AAPL['Date'] >= '2020-10-01'].reset_index(drop = True)

plt.figure(figsize = (10, 5))
plt.plot(AAPL_Train_Data['Date'], AAPL_Train_Data['log Adj. Close'], label = 'Train Data')
plt.plot(AAPL_Test_Data['Date'], AAPL_Test_Data['log Adj. Close'], label = 'Test Data')
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')

###Modeling

In [None]:
AAPL_Auto_ARIMA_Model = auto_arima(AAPL_Train_Data['log Adj. Close'], seasonal = False,
                                   error_action = 'ignore', suppress_warnings = True)
print(AAPL_Auto_ARIMA_Model.summary())

In [None]:
AAPL_ARIMA_Model = ARIMA(AAPL_Train_Data['log Adj. Close'], order = (1, 1, 0))
AAPL_ARIMA_Model_Fit = AAPL_ARIMA_Model.fit()
print(AAPL_ARIMA_Model_Fit.summary())

###Predicting the closing stock price of Apple

In [None]:
AAPL_output = AAPL_ARIMA_Model_Fit.forecast(102, alpha=0.05)
AAPL_predictions = np.exp(AAPL_output[0])
plt.figure(figsize=(10, 5))
plt.plot(AAPL_Train_Data['Date'], AAPL_Train_Data['Adj. Close'], label = 'Training')
plt.plot(AAPL_Test_Data['Date'], AAPL_Test_Data['Adj. Close'], label = 'Testing')
plt.plot(AAPL_Test_Data['Date'], AAPL_predictions, label = 'Predictions')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.legend()

In [None]:
rmse = math.sqrt(mean_squared_error(AAPL_Test_Data['Adj. Close'], AAPL_predictions))
mape = np.mean(np.abs(AAPL_predictions - AAPL_Test_Data['Adj. Close'])/np.abs(AAPL_Test_Data['Adj. Close']))

print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')

#Tesla

In [None]:
TSLA.head()

In [None]:
TSLA.info()

In [None]:
# Change Dtype of Date column
TSLA["Date"] = pd.to_datetime(TSLA["Date"])

In [None]:
Test_Stationarity(TSLA)

The p-value > 0.05, so we cannot reject the Null hypothesis. Hence, we would need to use the “Integrated (I)” concept, denoted by value ‘d’ in time series, to make the data stationary while building the Auto ARIMA model.

Now let's take log of the 'Adj. Close' column to reduce the magnitude of the values and reduce the series rising trend.

In [None]:
TSLA['log Adj. Close'] = np.log(TSLA['Adj. Close'])
TSLA_log_moving_avg = TSLA['log Adj. Close'].rolling(12).mean()
TSLA_log_std = TSLA['log Adj. Close'].rolling(12).std()

plt.figure(figsize = (10, 5))
plt.plot(TSLA['Date'], TSLA_log_moving_avg, label = "Rolling Mean")
plt.plot(TSLA['Date'], TSLA_log_std, label = "Rolling Std")
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')
plt.title("Rolling Mean and Standard Deviation")

###Split the data into training and test set
    Training Period: 2015-01-02 - 2020-09-30

    Testing Period:  2020-10-01 - 2021-02-26

In [None]:
TSLA_Train_Data = TSLA[TSLA['Date'] < '2020-10-01']
TSLA_Test_Data = TSLA[TSLA['Date'] >= '2020-10-01'].reset_index(drop = True)

plt.figure(figsize = (10, 5))
plt.plot(TSLA_Train_Data['Date'], TSLA_Train_Data['log Adj. Close'], label = 'Train Data')
plt.plot(TSLA_Test_Data['Date'], TSLA_Test_Data['log Adj. Close'], label = 'Test Data')
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')

###Modeling

In [None]:
TSLA_Auto_ARIMA_Model = auto_arima(TSLA_Train_Data['log Adj. Close'], seasonal = False,
                                   error_action = 'ignore', suppress_warnings = True)
print(TSLA_Auto_ARIMA_Model.summary())

In [None]:
TSLA_ARIMA_Model = ARIMA(TSLA_Train_Data['log Adj. Close'], order = (5, 2, 2))
TSLA_ARIMA_Model_Fit = TSLA_ARIMA_Model.fit()
print(TSLA_ARIMA_Model_Fit.summary())

###Predicting the closing stock price of Tesla

In [None]:
TSLA_output = TSLA_ARIMA_Model_Fit.forecast(102, alpha=0.05)
TSLA_predictions = np.exp(TSLA_output[0])
plt.figure(figsize=(10, 5))
plt.plot(TSLA_Train_Data['Date'], TSLA_Train_Data['Adj. Close'], label = 'Training')
plt.plot(TSLA_Test_Data['Date'], TSLA_Test_Data['Adj. Close'], label = 'Testing')
plt.plot(TSLA_Test_Data['Date'], TSLA_predictions, label = 'Predictions')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.legend()

In [None]:
rmse = math.sqrt(mean_squared_error(TSLA_Test_Data['Adj. Close'], TSLA_predictions))
mape = np.mean(np.abs(TSLA_predictions - TSLA_Test_Data['Adj. Close'])/np.abs(TSLA_Test_Data['Adj. Close']))

print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')

#Google

In [None]:
GOOG.head()

In [None]:
GOOG.info()

In [None]:
# Change Dtype of Date column
GOOG["Date"] = pd.to_datetime(GOOG["Date"])

In [None]:
Test_Stationarity(GOOG)

The p-value > 0.05, so we cannot reject the Null hypothesis. Hence, we would need to use the “Integrated (I)” concept, denoted by value ‘d’ in time series, to make the data stationary while building the Auto ARIMA model.

Now let's take log of the 'Adj. Close' column to reduce the magnitude of the values and reduce the series rising trend.

In [None]:
GOOG['log Adj. Close'] = np.log(GOOG['Adj. Close'])
GOOG_log_moving_avg = GOOG['log Adj. Close'].rolling(12).mean()
GOOG_log_std = GOOG['log Adj. Close'].rolling(12).std()

plt.figure(figsize = (10, 5))
plt.plot(GOOG['Date'], GOOG_log_moving_avg, label = "Rolling Mean")
plt.plot(GOOG['Date'], GOOG_log_std, label = "Rolling Std")
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')
plt.title("Rolling Mean and Standard Deviation")

###Split the data into training and test set
    Training Period: 2015-01-02 - 2020-10-30

    Testing Period:  2020-11-02 - 2021-02-26

In [None]:
GOOG_Train_Data = GOOG[GOOG['Date'] < '2020-11-01']
GOOG_Test_Data = GOOG[GOOG['Date'] >= '2020-11-01'].reset_index(drop = True)

plt.figure(figsize = (10, 5))
plt.plot(GOOG_Train_Data['Date'], GOOG_Train_Data['log Adj. Close'], label = 'Train Data')
plt.plot(GOOG_Test_Data['Date'], GOOG_Test_Data['log Adj. Close'], label = 'Test Data')
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')

###Modeling

In [None]:
GOOG_Auto_ARIMA_Model = auto_arima(GOOG_Train_Data['log Adj. Close'], seasonal = False,
                                   error_action = 'ignore', suppress_warnings = True)
print(GOOG_Auto_ARIMA_Model.summary())

In [None]:
GOOG_ARIMA_Model = ARIMA(GOOG_Train_Data['log Adj. Close'], order = (1, 1, 0))
GOOG_ARIMA_Model_Fit = GOOG_ARIMA_Model.fit()
print(GOOG_ARIMA_Model_Fit.summary())

###Predicting the closing stock price of Google

In [None]:
GOOG_output = GOOG_ARIMA_Model_Fit.forecast(80, alpha=0.05)
GOOG_predictions = np.exp(GOOG_output[0])
plt.figure(figsize=(10, 5))
plt.plot(GOOG_Train_Data['Date'], GOOG_Train_Data['Adj. Close'], label = 'Training')
plt.plot(GOOG_Test_Data['Date'], GOOG_Test_Data['Adj. Close'], label = 'Testing')
plt.plot(GOOG_Test_Data['Date'], GOOG_predictions, label = 'Predictions')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.legend()

In [None]:
rmse = math.sqrt(mean_squared_error(GOOG_Test_Data['Adj. Close'], GOOG_predictions))
mape = np.mean(np.abs(GOOG_predictions - GOOG_Test_Data['Adj. Close'])/np.abs(GOOG_Test_Data['Adj. Close']))

print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')

#Microsoft

In [None]:
MSFT.head()

In [None]:
MSFT.info()

In [None]:
# Change Dtype of Date column
MSFT["Date"] = pd.to_datetime(MSFT["Date"])

In [None]:
Test_Stationarity(MSFT)

The p-value > 0.05, so we cannot reject the Null hypothesis. Hence, we would need to use the “Integrated (I)” concept, denoted by value ‘d’ in time series, to make the data stationary while building the Auto ARIMA model.

Now let's take log of the 'Adj. Close' column to reduce the magnitude of the values and reduce the series rising trend.

In [None]:
MSFT['log Adj. Close'] = np.log(MSFT['Adj. Close'])
MSFT_log_moving_avg = MSFT['log Adj. Close'].rolling(12).mean()
MSFT_log_std = MSFT['log Adj. Close'].rolling(12).std()

plt.figure(figsize = (10, 5))
plt.plot(MSFT['Date'], MSFT_log_moving_avg, label = "Rolling Mean")
plt.plot(MSFT['Date'], MSFT_log_std, label = "Rolling Std")
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')
plt.title("Rolling Mean and Standard Deviation")

###Split the data into training and test set
    Training Period: 2015-01-02 - 2020-09-30

    Testing Period:  2020-10-01 - 2021-02-26

In [None]:
MSFT_Train_Data = MSFT[MSFT['Date'] < '2020-10-01']
MSFT_Test_Data = MSFT[MSFT['Date'] >= '2020-10-01'].reset_index(drop = True)

plt.figure(figsize = (10, 5))
plt.plot(MSFT_Train_Data['Date'], MSFT_Train_Data['log Adj. Close'], label = 'Train Data')
plt.plot(MSFT_Test_Data['Date'], MSFT_Test_Data['log Adj. Close'], label = 'Test Data')
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')

###Modeling

In [None]:
MSFT_Auto_ARIMA_Model = auto_arima(MSFT_Train_Data['log Adj. Close'], seasonal = False,
                                   error_action = 'ignore', suppress_warnings = True)
print(MSFT_Auto_ARIMA_Model.summary())

In [None]:
MSFT_ARIMA_Model = ARIMA(MSFT_Train_Data['log Adj. Close'], order = (1, 1, 0))
MSFT_ARIMA_Model_Fit = MSFT_ARIMA_Model.fit()
print(MSFT_ARIMA_Model_Fit.summary())

###Predicting the closing stock price of Microsoft

In [None]:
MSFT_output = MSFT_ARIMA_Model_Fit.forecast(102, alpha=0.05)
MSFT_predictions = np.exp(MSFT_output[0])
plt.figure(figsize=(10, 5))
plt.plot(MSFT_Train_Data['Date'], MSFT_Train_Data['Adj. Close'], label = 'Training')
plt.plot(MSFT_Test_Data['Date'], MSFT_Test_Data['Adj. Close'], label = 'Testing')
plt.plot(MSFT_Test_Data['Date'], MSFT_predictions, label = 'Predictions')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.legend()

In [None]:
rmse = math.sqrt(mean_squared_error(MSFT_Test_Data['Adj. Close'], MSFT_predictions))
mape = np.mean(np.abs(MSFT_predictions - MSFT_Test_Data['Adj. Close'])/np.abs(MSFT_Test_Data['Adj. Close']))

print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')

#Amazon

In [None]:
AMZN.head()

In [None]:
AMZN.info()

In [None]:
# Change Dtype of Date column
AMZN["Date"] = pd.to_datetime(AMZN["Date"])

In [None]:
Test_Stationarity(AMZN)

The p-value > 0.05, so we cannot reject the Null hypothesis. Hence, we would need to use the “Integrated (I)” concept, denoted by value ‘d’ in time series, to make the data stationary while building the Auto ARIMA model.

Now let's take log of the 'Adj. Close' column to reduce the magnitude of the values and reduce the series rising trend.

In [None]:
AMZN['log Adj. Close'] = np.log(AMZN['Adj. Close'])
AMZN_log_moving_avg = AMZN['log Adj. Close'].rolling(12).mean()
AMZN_log_std = AMZN['log Adj. Close'].rolling(12).std()

plt.figure(figsize = (10, 5))
plt.plot(AMZN['Date'], AMZN_log_moving_avg, label = "Rolling Mean")
plt.plot(AMZN['Date'], AMZN_log_std, label = "Rolling Std")
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')
plt.title("Rolling Mean and Standard Deviation")

###Split the data into training and test set
    Training Period: 2015-01-02 - 2020-10-30

    Testing Period:  2020-11-02 - 2021-02-26

In [None]:
AMZN_Train_Data = AMZN[AMZN['Date'] < '2020-11-01']
AMZN_Test_Data = AMZN[AMZN['Date'] >= '2020-11-01'].reset_index(drop = True)

plt.figure(figsize = (10, 5))
plt.plot(AMZN_Train_Data['Date'], AMZN_Train_Data['log Adj. Close'], label = 'Train Data')
plt.plot(AMZN_Test_Data['Date'], AMZN_Test_Data['log Adj. Close'], label = 'Test Data')
plt.xlabel('Time')
plt.ylabel('log Adj. Close')
plt.legend(loc = 'best')

###Modeling

In [None]:
AMZN_Auto_ARIMA_Model = auto_arima(AMZN_Train_Data['log Adj. Close'], seasonal = False,
                                   error_action = 'ignore', suppress_warnings = True)
print(AMZN_Auto_ARIMA_Model.summary())

In [None]:
AMZN_ARIMA_Model = ARIMA(AMZN_Train_Data['log Adj. Close'], order = (0, 1, 0))
AMZN_ARIMA_Model_Fit = AMZN_ARIMA_Model.fit()
print(AMZN_ARIMA_Model_Fit.summary())

###Predicting the closing stock price of Amazon

In [None]:
AMZN_output = AMZN_ARIMA_Model_Fit.forecast(80, alpha=0.05)
AMZN_predictions = np.exp(AMZN_output[0])
plt.figure(figsize=(10, 5))
plt.plot(AMZN_Train_Data['Date'], AMZN_Train_Data['Adj. Close'], label = 'Training')
plt.plot(AMZN_Test_Data['Date'], AMZN_Test_Data['Adj. Close'], label = 'Testing')
plt.plot(AMZN_Test_Data['Date'], AMZN_predictions, label = 'Predictions')
plt.xlabel('Time')
plt.ylabel('Closing Price')
plt.legend()

In [None]:
rmse = math.sqrt(mean_squared_error(AMZN_Test_Data['Adj. Close'], AMZN_predictions))
mape = np.mean(np.abs(AMZN_predictions - AMZN_Test_Data['Adj. Close'])/np.abs(AMZN_Test_Data['Adj. Close']))

print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')