#### Import libraries

In [None]:
import pandas as pd
import numpy as np

import datetime

In [None]:
today = datetime.datetime.today()
today = today.strftime('%Y-%m-%d')

print("Today is " + str(today))

#### Import data
Source: https://www.nasdaq.com/market-activity/stocks/msft/historical

In [None]:
data = pd.read_csv("processed_data.csv")

In [None]:
data.rename(columns={"Unnamed: 0":"Date"}, inplace=True)

In [None]:
data.set_index("Date", inplace=True)
data.index = pd.to_datetime(data.index)

In [None]:
data.head()

In [None]:
data.info()

#### Date Range For Modeling

In [None]:
model_data = data["Close"].loc["2023-01-01":"2024-11-30"]

#### Test & Train Split

In [None]:
# Set training and testing sizes
train_size = int(len(model_data) * 0.90)

train, test = model_data[0:train_size], model_data[train_size:len(data)]

#### Visualizations

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [None]:
plt.plot(train, color="blue", label="Train Split")
plt.plot(test, color="green", label="Test Split")
plt.title("Closing price stock price", fontsize=16, pad=20)
plt.xticks(rotation=45)
plt.show()

#### ADF Test for Stationality

In [None]:
# Import Libraries
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA

In [None]:
# ADF Test No Difference
adf_test = adfuller(train)
print(f"p-value: {adf_test[1]}")

In [None]:
# Plot ACF and PCF
plot_acf(train)
plot_pacf(train)
plt.show()

In [None]:
# ADF Test With Differencing in first order
train_df_diff = train.diff().dropna()
train_df_diff.plot()

In [None]:
# ADF Test With Differencing in first order
adf_test_diff = adfuller(train_df_diff)
print(f"p-value: {adf_test_diff[1]}")

In [None]:
plot_acf(train_df_diff)
plot_pacf(train_df_diff)
plt.show()

#### Time Series Model

In [None]:
model = ARIMA(train, order=(0,1,0), freq="D")
model_fit = model.fit()
model_fit.summary()

#### Residuals Plot

In [None]:
residuals = model_fit.resid[1:]
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(title="Density", kind="kde", ax=ax[1])
plt.show()

In [None]:
# ACF and PACF plots of Residuals
plot_acf(residuals)
plot_pacf(residuals)
plt.show()

#### Forecast On Testing Data With Model

In [None]:
forecast = model_fit.forecast(steps=len(test))

In [None]:
# Compare Test Data with Forecast Data
comparison = pd.DataFrame()
comparison["Test"] = test.round(2)
comparison["Forecast"] = forecast.round(2)
comparison.head()

#### Analyze Model Performance

In [None]:
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

mae = mean_absolute_error(test, forecast)
mape = mean_absolute_percentage_error(test, forecast)
rsme = mean_squared_error(test, forecast)

print("Mean Absolute Error =", round(mae, 3))
print("Mean Absolute Percentage Error =", round(mape, 3))
print("Mean Squared Error =", round(rsme, 3))

In [None]:
# Ratio of RSME to mean of model data inputs
ratio = rsme / np.mean(model_data)

if ratio >= 0.05:
    print("There is likely medium to high error in this model. Ratio is", f"{ratio:.1%}")
else:
    print("There is likely low error in this model, as the ratio of RSME to mean of input values is", f"{ratio:.1%}")

#### Predict Future Prices

In [None]:
predicton_steps = 30
start_index = len(train) + len(test)
end_index = start_index + predicton_steps

predictions = model_fit.predict(start = start_index, end=end_index)

In [None]:
# View last five rows of future predictions
predictions.tail(15).round(2)

#### Plot Test, Train, and Forecast

In [None]:
plt.plot(train, label="Train", color="blue")
plt.plot(test, label="Test", color="green")
plt.plot(forecast, label="Model Forecast", color="red")
plt.plot(predictions, label="Model Future Predictions", color="black")
plt.title("Predicted closing price stock price", fontsize=18, pad=20)
plt.xticks(rotation=45)
plt.legend()
plt.show()