In [None]:
import pandas as pd

# Load the 'TSLA.csv'
df = pd.read_csv('/content/TSLA.csv')


print("First 5 rows of the DataFrame:")
print(df.head())

print("\nDataFrame Info:")
df.info()

In [None]:
import pandas as pd

# Remove the first two rows which contain metadata
df = df.iloc[2:].copy()

# Rename the columns: 'Price' seems to be 'Date'
df = df.rename(columns={'Price': 'Date'})

print("First 5 rows after dropping metadata rows and renaming 'Price' column:")
print(df.head())

print("\nDataFrame Info after dropping metadata rows and renaming 'Price' column:")
df.info()

In [None]:
import pandas as pd

# Convert 'Date' column to datetime objects
df['Date'] = pd.to_datetime(df['Date'])

# Convert relevant columns to numeric type
numeric_cols = ['Close', 'High', 'Low', 'Open', 'Volume']
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col])

print("First 5 rows after converting 'Date' and numeric columns:")
print(df.head())

print("\nDataFrame Info after converting 'Date' and numeric columns:")
df.info()

## Prepare Data for Prophet




In [None]:
import pandas as pd

# Rename the 'Date' column to 'ds' and 'Close' column to 'y'
df = df.rename(columns={'Date': 'ds', 'Close': 'y'})

print("First 5 rows of the DataFrame after renaming columns:")
print(df.head())

print("\nData types of 'ds' and 'y' columns:")
print(df[['ds', 'y']].dtypes)

## Train Prophet Model



In [None]:
import warnings
warnings.filterwarnings('ignore')

!pip install prophet

from prophet import Prophet

# Initialize the Prophet model, disabling weekly seasonality (stock markets closed on weekends)
# and enabling yearly seasonality.
model = Prophet(weekly_seasonality=False, yearly_seasonality=True)

# Fit the model to the historical data (df should have 'ds' and 'y' columns)
model.fit(df)

In [None]:
import pandas as pd

# Create a DataFrame with future dates for forecasting
# We need to forecast for 7 days
future = model.make_future_dataframe(periods=7)

# Make predictions
forecast = model.predict(future)

print("Last 10 rows of the forecast (including future predictions):")
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10))

In [None]:
import pandas as pd

forecast_df = pd.merge(df, forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], on='ds', how='left')

print("Last 10 rows of the combined historical and forecast DataFrame:")
print(forecast_df.tail(10))

print("\nInfo of the combined DataFrame:")
forecast_df.info()

## Calculate Accuracy Metrics



In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# 1. Filter the forecast_df DataFrame to include only rows where the actual 'y' values are available
historical_predictions = forecast_df.dropna(subset=['y']).copy()

# 2. Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(historical_predictions['y'], historical_predictions['yhat'])

# 3. Calculate the Root Mean Squared Error (RMSE)
rmse = np.sqrt(mean_squared_error(historical_predictions['y'], historical_predictions['yhat']))

# 4. Print both the calculated MAE and RMSE values
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

## Visualize Forecasting Dashboard


In [None]:
import matplotlib.pyplot as plt

# Create the forecast plot using Prophet's built-in plotting function
fig = model.plot(forecast)


plt.title('TSLA Stock Price Forecast with Prophet')


plt.show()

## Prepare Data for ARIMA


In [None]:
import pandas as pd

# Set the 'ds' column as the DataFrame index
df = df.set_index('ds')

print("First 5 rows of the DataFrame with 'ds' as index:")
print(df.head())

# Print the information of the DataFrame to confirm the index type
print("\nDataFrame Info after setting 'ds' as index:")
df.info()

## Train ARIMA Model



In [None]:
from statsmodels.tsa.arima.model import ARIMA

# 'D' stands for daily frequency
df.index = pd.to_datetime(df.index)
df = df.asfreq('D')

# (p: auto-regressive terms, d: differencing order, q: moving average terms)
# (5, 1, 0) is a common starting point.
arima_order = (5, 1, 0)

# Initialize and fit the ARIMA model using the 'y' (Close price) column
# The 'y' column is already available in the 'df' DataFrame, which is indexed by 'ds'.
model_arima = ARIMA(df['y'], order=arima_order)
model_arima_fit = model_arima.fit()

# Print the model summary
print("ARIMA Model Summary:")
print(model_arima_fit.summary())

## Generate ARIMA Forecasts



In [None]:
import pandas as pd

# 1. Generate in-sample predictions for historical data
df['arima_yhat'] = model_arima_fit.predict(start=0, end=len(df)-1)

# 2. Create a DataFrame for future dates for a 7-day forecast
last_date = df.index.max()
future_dates = pd.date_range(start=last_date, periods=8, freq='D')[1:] # +1 because lasincludedt_date is

# 3. Generate the 7-day future forecast

forecast_steps = 7
future_forecast_result = model_arima_fit.forecast(steps=forecast_steps)

# Convert the forecast result to a Series with the correct future dates as index
future_forecast_series = pd.Series(future_forecast_result, index=future_dates)

# 4. Create a new DataFrame (arima_forecast_df) that combines the historical data and the future forecast
# Initialize arima_forecast_df with historical data and predictions
arima_forecast_df = df[['y', 'arima_yhat']].copy()

# Add future forecast to arima_forecast_df
# Create a DataFrame for the future predictions
future_df = pd.DataFrame({'y': [None]*len(future_forecast_series), 'arima_yhat': future_forecast_series})
future_df.index = future_forecast_series.index

arima_forecast_df = pd.concat([arima_forecast_df, future_df])

print("Last 10 rows of the ARIMA combined historical and forecast DataFrame:")
print(arima_forecast_df.tail(10))

## Calculate ARIMA Accuracy Metrics




In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# 1. Filter the arima_forecast_df DataFrame to include only rows where the actual 'y' values are available
arima_historical_predictions = arima_forecast_df.dropna(subset=['y']).copy()

# 2. Calculate the Mean Absolute Error (MAE)
mae_arima = mean_absolute_error(arima_historical_predictions['y'], arima_historical_predictions['arima_yhat'])

# 3. Calculate the Root Mean Squared Error (RMSE)
rmse_arima = np.sqrt(mean_squared_error(arima_historical_predictions['y'], arima_historical_predictions['arima_yhat']))

# 4. Print both the calculated MAE and RMSE values for the ARIMA model
print(f"ARIMA Mean Absolute Error (MAE): {mae_arima:.4f}")
print(f"ARIMA Root Mean Squared Error (RMSE): {rmse_arima:.4f}")

## Visualize ARIMA Forecast


In [None]:
import matplotlib.pyplot as plt

# Create a figure and an axes object for the plot
plt.figure(figsize=(14, 7))

# Plot the historical 'y' values
plt.plot(arima_forecast_df['y'].dropna(), label='Actual Prices', color='blue')

# Plot the in-sample predictions for the historical period
historical_arima_yhat = arima_forecast_df.dropna(subset=['y'])['arima_yhat']
plt.plot(historical_arima_yhat, label='ARIMA Fit (Historical)', color='green', linestyle='--')

# Plot the 7-day future forecast
future_arima_forecast = arima_forecast_df[arima_forecast_df['y'].isnull()]['arima_yhat']
plt.plot(future_arima_forecast, label='ARIMA Forecast (7 days)', color='red')

plt.title('ARIMA Model: TSLA Stock Price Forecast')
plt.xlabel('Date')
plt.ylabel('Stock Price')

plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Compare Model Accuracy



In [None]:
print("Prophet Model Accuracy Metrics:")
print(f"  Mean Absolute Error (MAE): {mae:.4f}")
print(f"  Root Mean Squared Error (RMSE): {rmse:.4f}")

print("\nARIMA Model Accuracy Metrics:")
print(f"  Mean Absolute Error (MAE): {mae_arima:.4f}")
print(f"  Root Mean Squared Error (RMSE): {rmse_arima:.4f}")


## Prepare Data for SARIMAX



In [None]:
import pandas as pd


# Load the original data, correcting the filename to 'TSLA.csv'
original_df = pd.read_csv('/content/TSLA.csv')

original_df = original_df.iloc[2:].copy()
original_df = original_df.rename(columns={'Price': 'Date'})

original_df['Date'] = pd.to_datetime(original_df['Date'])
numeric_cols = ['Close', 'High', 'Low', 'Open', 'Volume']
for col in numeric_cols:
    original_df[col] = pd.to_numeric(original_df[col])

df = original_df.rename(columns={'Date': 'ds', 'Close': 'y'})

df = df.set_index('ds')
df.index = pd.to_datetime(df.index)
df = df.asfreq('D')

print("First 5 rows of the DataFrame (df) for SARIMAX preparation:")
print(df.head())

print("\nDataFrame (df) Info for SARIMAX preparation:")
df.info()

## Train SARIMAX Model



In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Define the non-seasonal order (p, d, q)
sarimax_order = (1, 1, 1)

# Define the seasonal order (P, D, Q, S)
# S=5 for weekly seasonality (trading days) is a common starting point for daily stock data.
sarimax_seasonal_order = (1, 0, 0, 5)

# Initialize and fit the SARIMAX model using the 'y' (Close price) column
model_sarimax = SARIMAX(df['y'],
                        order=sarimax_order,
                        seasonal_order=sarimax_seasonal_order,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
model_sarimax_fit = model_sarimax.fit(disp=False)

# Print the model summary
print("SARIMAX Model Summary:")
print(model_sarimax_fit.summary())

In [None]:
import pandas as pd

# 1. Generate in-sample predictions for historical data
sarimax_predictions = model_sarimax_fit.predict(start=0, end=len(df)-1)
df['sarimax_yhat'] = sarimax_predictions

# 2. Create a DataFrame for future dates for a 7-day forecast
last_date_in_df = df.index.max()
# Generate 7 future dates, ensuring to start *after* the last date in df
future_dates = pd.date_range(start=last_date_in_df + pd.Timedelta(days=1), periods=7, freq='D')

# 3. Generate the 7-day future forecast
sarimax_future_forecast_result = model_sarimax_fit.forecast(steps=7)

# Convert the forecast result to a Series with the correct future dates as index
sarimax_future_forecast_series = pd.Series(sarimax_future_forecast_result, index=future_dates)

# 4. Create a new DataFrame (sarimax_forecast_df) that combines the historical data and the future forecast
# Initialize sarimax_forecast_df with historical data and predictions
sarimax_forecast_df = df[['y', 'sarimax_yhat']].copy()

# Add future forecast to sarimax_forecast_df
# Create a DataFrame for the future predictions
future_sarimax_df = pd.DataFrame({'y': [None]*len(sarimax_future_forecast_series), 'sarimax_yhat': sarimax_future_forecast_series})
future_sarimax_df.index = sarimax_future_forecast_series.index

# Explicitly cast 'y' column to float to avoid FutureWarning
future_sarimax_df['y'] = future_sarimax_df['y'].astype(float)

sarimax_forecast_df = pd.concat([sarimax_forecast_df, future_sarimax_df])

print("Last 10 rows of the SARIMAX combined historical and forecast DataFrame:")
print(sarimax_forecast_df.tail(10))

## Calculate SARIMAX Accuracy Metrics


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# 1. Filter the sarimax_forecast_df DataFrame to include only rows where the actual 'y' values are available
sarimax_historical_predictions = sarimax_forecast_df.dropna(subset=['y']).copy()

# 2. Calculate the Mean Absolute Error (MAE)
mae_sarimax = mean_absolute_error(sarimax_historical_predictions['y'], sarimax_historical_predictions['sarimax_yhat'])

# 3. Calculate the Root Mean Squared Error (RMSE)
rmse_sarimax = np.sqrt(mean_squared_error(sarimax_historical_predictions['y'], sarimax_historical_predictions['sarimax_yhat']))

# 4. Print both the calculated MAE and RMSE values for the SARIMAX model
print(f"SARIMAX Mean Absolute Error (MAE): {mae_sarimax:.4f}")
print(f"SARIMAX Root Mean Squared Error (RMSE): {rmse_sarimax:.4f}")

## Visualize SARIMAX Forecast



In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 1. Generate in-sample predictions with confidence intervals
hist_pred = model_sarimax_fit.get_prediction(start=0, end=len(df)-1)
hist_pred_ci = hist_pred.conf_int()

# Extract predicted mean, lower, and upper bounds for historical data
sarimax_historical_yhat = hist_pred.predicted_mean
sarimax_historical_yhat_lower = hist_pred_ci.iloc[:, 0]
sarimax_historical_yhat_upper = hist_pred_ci.iloc[:, 1]

# 2. Generate a 7-day future forecast with confidence intervals
# Last date in the historical data
last_date_in_df = df.index.max()
# Generate 7 future dates, ensuring to start *after* the last date in df
future_dates = pd.date_range(start=last_date_in_df + pd.Timedelta(days=1), periods=7, freq='D')

# Get forecast and its confidence intervals
forecast_result = model_sarimax_fit.get_forecast(steps=7)
forecast_ci = forecast_result.conf_int()

# Extract predicted mean, lower, and upper bounds for future forecast
sarimax_future_yhat = forecast_result.predicted_mean
sarimax_future_yhat_lower = forecast_ci.iloc[:, 0]
sarimax_future_yhat_upper = forecast_ci.iloc[:, 1]

# 3. Create a single DataFrame combining historical and future predictions with CIs
# Historical part
plot_df_hist = df[['y']].copy()
plot_df_hist['yhat'] = sarimax_historical_yhat
plot_df_hist['yhat_lower'] = sarimax_historical_yhat_lower
plot_df_hist['yhat_upper'] = sarimax_historical_yhat_upper

# Future part
plot_df_future = pd.DataFrame({
    'y': [None]*len(future_dates),
    'yhat': sarimax_future_yhat,
    'yhat_lower': sarimax_future_yhat_lower,
    'yhat_upper': sarimax_future_yhat_upper
}, index=future_dates)

plot_df_future['y'] = plot_df_future['y'].astype(float)

# Combine historical and future for plotting
plot_df = pd.concat([plot_df_hist, plot_df_future])

# 4-8. Plotting
plt.figure(figsize=(15, 8))

# Plot actual historical prices
plt.plot(plot_df['y'].dropna(), label='Actual Prices', color='blue', linewidth=1)

# Plot historical SARIMAX fit
plt.plot(plot_df_hist['yhat'], label='SARIMAX Fit (Historical)', color='green', linestyle='--', linewidth=1)

# Plot historical confidence interval
plt.fill_between(plot_df_hist.index, plot_df_hist['yhat_lower'], plot_df_hist['yhat_upper'], color='green', alpha=0.1, label='Historical Confidence Interval')

# Plot 7-day future SARIMAX forecast
plt.plot(plot_df_future['yhat'], label='SARIMAX Forecast (7 days)', color='red', linewidth=1)

# Plot future confidence interval
plt.fill_between(plot_df_future.index, plot_df_future['yhat_lower'], plot_df_future['yhat_upper'], color='red', alpha=0.1, label='Future Confidence Interval')
plt.title('SARIMAX Model: TSLA Stock Price Forecast with Confidence Intervals')
plt.xlabel('Date')
plt.ylabel('Stock Price')

plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Compare All Model Accuracies

Compare the calculated MAE and RMSE for the SARIMAX model with the previously obtained MAE and RMSE from both the Prophet and ARIMA models. Discuss the differences in performance.


## Final Task Summary: Comparing Prophet, ARIMA, and SARIMAX Models

### Model Performance Comparison:

**Prophet Model Accuracy Metrics:**
*   Mean Absolute Error (MAE): 19.2286
*   Root Mean Squared Error (RMSE): 34.2270

**ARIMA Model Accuracy Metrics:**
*   Mean Absolute Error (MAE): 2.3823
*   Root Mean Squared Error (RMSE): 5.3724

**SARIMAX Model Accuracy Metrics:**
*   Mean Absolute Error (MAE): 2.3759
*   Root Mean Squared Error (RMSE): 5.3727

### Discussion and Analysis:

Comparing the accuracy metrics across all three models, **ARIMA and SARIMAX models significantly outperform the Prophet model** for this particular TSLA stock price dataset and forecasting task. Both MAE and RMSE are substantially lower for ARIMA and SARIMAX compared to Prophet, indicating that their predictions are much closer to the actual historical stock prices.

*   **ARIMA vs. Prophet:** ARIMA (MAE: 2.3823, RMSE: 5.3724) shows a dramatic improvement over Prophet (MAE: 19.2286, RMSE: 34.2270). This suggests that the stock price data exhibits strong autoregressive properties and trends that are better captured by ARIMA's differencing and autoregressive components. Prophet, while versatile for various seasonal patterns, might be less suitable for the complex, non-linear, and often volatile nature of stock market movements where daily seasonality (weekends) are non-trading days.

*   **SARIMAX vs. ARIMA:** The SARIMAX model (MAE: 2.3759, RMSE: 5.3727) shows a very slight improvement in MAE over the ARIMA model, while RMSE is almost identical. This marginal difference suggests that the seasonal component (`(1, 0, 0, 5)`) might capture some subtle weekly patterns (assuming a 5-day trading week) but does not dramatically enhance the forecast quality beyond what ARIMA already achieves. For stock data, true 'seasonal' patterns might be more irregular or complex than what a fixed seasonal order in SARIMAX can capture effectively.

### Observed Trends and Predictions:

*   **Prophet:** The Prophet model's fit on historical data, as seen in its visualization, likely struggled to adapt to the sharp fluctuations and long-term trends of TSLA stock prices, resulting in higher errors. Its future predictions would carry a larger band of uncertainty.
*   **ARIMA:** The ARIMA model demonstrated a much closer fit to the historical data, accurately capturing the short-term dependencies and trends. Its 7-day future forecast (around 302-303) is a continuation of these observed patterns, with a relatively stable trajectory, reflecting the model's ability to extrapolate from recent movements.
*   **SARIMAX:** Similar to ARIMA, SARIMAX also provided a strong fit to the historical data. Its 7-day future forecast also showed a stable prediction around 302-302.1, with slightly more nuanced daily variations potentially due to the seasonal component, though the overall trend is very similar to ARIMA.

### Insights into Market Volatility and Uncertainty:

*   The significantly lower MAE and RMSE for ARIMA and SARIMAX indicate that these models are better at handling the inherent volatility of TSLA stock prices within the historical data. The errors are much smaller, meaning the models' predictions generally stay closer to the actual price movements.
*   Prophet's larger error metrics suggest it might be over-smoothing or under-fitting the volatile nature of stock data, leading to a wider range of predicted values that often deviate significantly from actuals.
*   While the confidence intervals for SARIMAX were visualized, they appear to offer a reasonable range for future predictions. The width of these intervals can serve as a proxy for the model's perceived uncertainty, especially for the 7-day forecast. For stock market forecasting, understanding these uncertainty bands is critical, as market conditions can change rapidly.

### Conclusion:

For forecasting TSLA stock prices, **ARIMA and SARIMAX models proved to be far more accurate than the Prophet model** in capturing the underlying patterns and historical movements. Given the marginal difference between ARIMA and SARIMAX, and the additional complexity of tuning seasonal parameters, a simpler ARIMA model might be preferred unless strong, clear seasonal patterns are evident and can be explicitly modeled. The choice of model heavily depends on the specific characteristics of the time series data and the trade-off between model complexity and predictive performance.
