In [None]:
# Libraries for data manipulation, visualization, and file operations
import pandas as pd
import matplotlib.pyplot as plt
import os

# Libraries for ARIMA modeling and time series analysis
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Library for model evaluation
from sklearn.metrics import mean_squared_error


In [None]:
# Define the absolute path to the dataset
absolute_path = '/Users/alan/Downloads/NVDA.csv'
df = pd.read_csv(absolute_path)

# Convert the 'Date' column to a datetime object to handle time series data more efficiently
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)  # Setting 'Date' as index to facilitate time-based operations

# Resample the data to business days. This is done to ensure that our data has uniform intervals.
# 'ffill' stands for 'forward fill', and it fills any missing values in our dataset with the previous day's value.
df = df.resample('B').ffill()


In [None]:
# This plot provides a general view of the stock's behavior over time.
df['Close'].plot(figsize=(15, 7))
plt.title('Closing Price Over Time')
plt.ylabel('Price')
plt.xlabel('Date')
plt.grid(True)
plt.show()


In [None]:
# The Augmented Dickey-Fuller test is a type of statistical test called a unit root test.
# The intuition behind it is that it determines how strongly a time series is defined by a trend.
result = adfuller(df['Close'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# If the series is not stationary (p-value > 0.05), difference the series to make it stationary
# Differencing is a method used to make a non-stationary time series stationary.
if result[1] > 0.05:
    df['Close_diff'] = df['Close'].diff()
    df.dropna(inplace=True)  # drop NA values resulting from differencing


In [None]:
# ACF and PACF plots are tools to help us decide the order of the AR and MA components in ARIMA model
# ACF describes the autocorrelation between an observation and another observation at a prior time step 
# PACF provides the partial correlation of a time series with its own lagged values, controlling for the values of the time series at all shorter lags
plot_acf(df['Close'])
plt.show()

plot_pacf(df['Close'])
plt.show()


In [None]:
# Based on the ACF and PACF plots, and some domain knowledge, we set the ARIMA parameters.
# 'p' is the order of the autoregressive term, 'd' is the number of differencing required to make the time series stationary,
# and 'q' is the order of the moving average term.
p = 1
d = 1
q = 1

# Splitting the dataset into training and testing sets. 
# We'll train on 80% of the dataset and test our predictions on the remaining 20%.
train_size = int(len(df) * 0.8)
train, test = df['Close'][:train_size], df['Close'][train_size:]


In [None]:
# Initialize the ARIMA model with the determined parameters and the training data
model = ARIMA(train, order=(p, d, q))

# Fit the model to the training data
model_fit = model.fit(disp=0)  # 'disp=0' means no debugging output 

# Use the fitted model to forecast the values for the test dataset
forecast = model_fit.forecast(steps=len(test)).values

# Evaluate the model's performance using Root Mean Squared Error (RMSE)
# RMSE measures the differences between predicted and actual values.
rmse = mean_squared_error(test, forecast, squared=False)
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Plotting the actual vs. forecasted values for better


In [None]:
# Forecast future values for a specific period
forecast_dates = pd.date_range(start="2023-10-25", end="2023-11-07", freq='B')
future_forecast = model_fit.forecast(steps=len(forecast_dates))

# Convert these forecasts into a DataFrame for easier handling and visualization
forecast_df = pd.DataFrame({
    'Predicted_Close': future_forecast.values
}, index=forecast_dates)

# Display the future forecasts
print(forecast_df)

# Save the forecasts to a CSV file
filename = "Implementation2.csv"
current_directory = os.getcwd()
full_path = os.path.join(current_directory, filename)
forecast_df.to_csv(full_path)
print("Predictions saved to Implementation2.csv!")
