In [None]:
# ============================================================
# Install Dependencies (only once if needed)
# ============================================================
pip install numpy==1.26.4 pandas==2.1.4 scikit-learn==1.3.2 pmdarima==2.0.4 tensorflow==2.15.0 matplotlib seaborn statsmodels


In [None]:
# ============================================================
# Imports & Paths
# ============================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import pmdarima as pm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import os

# Paths (LOCAL)
DATA_PATH = "data/raw/TSLA_raw.csv"
FIG_PATH = "result/figures"
CSV_PATH = "result/csv"

os.makedirs(FIG_PATH, exist_ok=True)
os.makedirs(CSV_PATH, exist_ok=True)

sns.set(style="whitegrid", palette="muted")


In [None]:
# ============================================================
# Load & Inspect Data
# ============================================================
df = pd.read_csv(DATA_PATH)
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)

print(df.head())

plt.figure(figsize=(12,5))
plt.plot(df["Date"], df["Close"], label="Close Price")
plt.title("Tesla Stock Price History")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.savefig(f"{FIG_PATH}/tsla_price_history.png")
plt.show()


In [None]:
# ============================================================
# Train/Test Split
# ============================================================
train_size = int(len(df) * 0.8)
train = df.iloc[:train_size]
test = df.iloc[train_size:]


In [None]:
# ============================================================
# ARIMA Model
# ============================================================
arima_model = pm.auto_arima(train["Close"], seasonal=False, stepwise=True, suppress_warnings=True)
n_test = len(test)
arima_preds = arima_model.predict(n_periods=n_test)

mae_arima = mean_absolute_error(test["Close"], arima_preds)
rmse_arima = math.sqrt(mean_squared_error(test["Close"], arima_preds))
mape_arima = np.mean(np.abs((test["Close"] - arima_preds) / test["Close"])) * 100

pd.DataFrame({
    "Date": test["Date"].values,
    "Actual": test["Close"].values,
    "Predicted": arima_preds
}).to_csv(f"{CSV_PATH}/arima_test_predictions.csv", index=False)

plt.figure(figsize=(10,5))
plt.plot(train["Date"], train["Close"], label="Train")
plt.plot(test["Date"], test["Close"], label="Test", color="orange")
plt.plot(test["Date"], arima_preds, label="ARIMA Forecast", color="red")
plt.legend()
plt.title("Tesla Price Forecast - ARIMA")
plt.savefig(f"{FIG_PATH}/arima_forecast.png")
plt.show()


In [None]:
# ============================================================
# LSTM Model
# ============================================================
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df[["Close"]])

time_step = 60
def create_dataset(dataset, time_step=1):
    X, Y = [], []
    for i in range(len(dataset) - time_step):
        X.append(dataset[i:(i+time_step), 0])
        Y.append(dataset[i + time_step, 0])
    return np.array(X), np.array(Y)

train_data = scaled_data[:train_size]
test_data = scaled_data[train_size - time_step:]

X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(time_step, 1)),
    LSTM(50, return_sequences=False),
    Dense(25),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=32, epochs=20, verbose=1)

train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

train_predict_inv = scaler.inverse_transform(train_predict)
y_train_inv = scaler.inverse_transform(y_train.reshape(-1, 1))
test_predict_inv = scaler.inverse_transform(test_predict)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

test_dates = df.iloc[train_size:]["Date"].values[-len(test_predict_inv):]

lstm_results = pd.DataFrame({
    "Date": test_dates,
    "Actual": y_test_inv.flatten(),
    "Predicted": test_predict_inv.flatten()
})
lstm_results.to_csv(f"{CSV_PATH}/lstm_test_predictions.csv", index=False)

plt.figure(figsize=(14,6))
plt.plot(df["Date"], df["Close"], label="Full Actual Price", color="lightgray")
plt.plot(lstm_results["Date"], lstm_results["Actual"], label="Actual (Test)", color="blue")
plt.plot(lstm_results["Date"], lstm_results["Predicted"], label="LSTM Prediction", color="red")
plt.xlabel("Date")
plt.ylabel("Price")
plt.title("Tesla Stock Price Prediction with LSTM")
plt.legend()
plt.savefig(f"{FIG_PATH}/lstm_forecast.png")
plt.show()


In [None]:
# ============================================================
# Model Comparison
# ============================================================
def calculate_metrics(actual, predicted):
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    return mae, rmse, mape

aligned_arima_preds = arima_preds[-len(lstm_results):]
lstm_mae, lstm_rmse, lstm_mape = calculate_metrics(lstm_results["Actual"], lstm_results["Predicted"])
arima_mae, arima_rmse, arima_mape = calculate_metrics(lstm_results["Actual"], aligned_arima_preds)

metrics_df = pd.DataFrame({
    "Model": ["LSTM", "ARIMA"],
    "MAE": [lstm_mae, arima_mae],
    "RMSE": [lstm_rmse, arima_rmse],
    "MAPE": [lstm_mape, arima_mape]
})
metrics_df.to_csv(f"{CSV_PATH}/metrics_test.csv", index=False)

plt.figure(figsize=(14,6))
plt.plot(lstm_results["Date"], lstm_results["Actual"], label="Actual (Test)", color="black")
plt.plot(lstm_results["Date"], lstm_results["Predicted"], label="LSTM Prediction", color="red")
plt.plot(lstm_results["Date"], aligned_arima_preds, label="ARIMA Prediction", color="green")
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.title("Tesla Stock Price Prediction: LSTM vs ARIMA")
plt.legend()
plt.savefig(f"{FIG_PATH}/comparison_forecast.png")
plt.show()


In [None]:
# ============================================================
# Future 30-Day Forecast
# ============================================================
# ARIMA forecast
arima_future = arima_model.predict(n_periods=30)
last_date = df["Date"].iloc[-1]
arima_future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
pd.DataFrame({"Date": arima_future_dates, "Forecast": arima_future}).to_csv(f"{CSV_PATH}/arima_future_30d.csv", index=False)

# LSTM forecast
last_data = scaled_data[-time_step:]
future_predictions = []
current_batch = last_data.reshape(1, time_step, 1)

for _ in range(30):
    pred = model.predict(current_batch)[0]
    future_predictions.append(pred)
    current_batch = np.append(current_batch[:,1:,:], [[pred]], axis=1)

future_preds_inv = scaler.inverse_transform(future_predictions)
lstm_future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
pd.DataFrame({"Date": lstm_future_dates, "Forecast": future_preds_inv.flatten()}).to_csv(f"{CSV_PATH}/lstm_future_30d.csv", index=False)
