## Import Libraries


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

2025-08-19 00:29:54.030903: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-19 00:29:54.108892: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-19 00:29:56.238597: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


## Load Data

In [None]:
try:
    data = pd.read_csv('../data/adj_close.csv', index_col=0, parse_dates=True)
    if isinstance(data.columns, pd.MultiIndex):
        new_cols = []
        for col_tuple in data.columns:
            if 'TSLA' in col_tuple:
                new_cols.append('TSLA')
            elif 'BND' in col_tuple:
                new_cols.append('BND')
            elif 'SPY' in col_tuple:
                new_cols.append('SPY')
            else:
                new_cols.append('_'.join(map(str, col_tuple)).replace('UNNAMED: ', '').replace('_LEVEL_2', ''))
        data.columns = new_cols
        data = data[['TSLA', 'BND', 'SPY']]

    print("Data loaded successfully from adj_close.csv!")
    print("Columns in loaded data:", data.columns)
except Exception as e:
    print(f"Error loading data from adj_close.csv: {e}")
    print("Please ensure 'adj_close.csv' is correctly generated from Task 1 and contains the 'Adj Close' prices.")
    try:
        raw_data = pd.read_csv('../data/financial_data.csv', header=[0, 1], index_col=0, parse_dates=True)
        data = raw_data['Adj Close']
        data.columns = ['BND', 'SPY', 'TSLA']
        print("Successfully re-extracted 'Adj Close' from 'financial_data.csv'.")
    except Exception as e_fallback:
        print(f"Fallback failed: {e_fallback}. Cannot proceed without proper data.")
        exit()

## Implement Forecasting models and Train models

In [None]:
data.columns = ['BND', 'SPY', 'TSLA']

tsla_data = data[['TSLA']].copy()

print("\nTSLA Data Head:")
print(tsla_data.head())

train_start_date = '2015-07-01'
train_end_date = '2023-12-31'
test_start_date = '2024-01-01'
test_end_date = '2025-07-31'

train_data = tsla_data[train_start_date:train_end_date]
test_data = tsla_data[test_start_date:test_end_date]

print(f"\nTraining data shape: {train_data.shape}")
print(f"Testing data shape: {test_data.shape}")

def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print(f"\n--- {model_name} Model Evaluation ---")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    return mae, rmse, mape

print("\n--- Training ARIMA Model (using statsmodels) ---")

order = (5, 1, 0)

arima_model_fit = None
try:
    arima_model = ARIMA(train_data['TSLA'].values, order=order)
    arima_model_fit = arima_model.fit()
    print(f"ARIMA Model Summary:")
    print(arima_model_fit.summary())
except Exception as e:
    print(f"Error training ARIMA model with statsmodels: {e}")

if arima_model_fit:
    arima_forecast_array = arima_model_fit.forecast(steps=len(test_data))
    arima_forecast_series = pd.Series(arima_forecast_array, index=test_data.index)

    evaluate_model(test_data['TSLA'], arima_forecast_series, "ARIMA (statsmodels)")

    plt.figure(figsize=(15, 7))
    plt.plot(train_data['TSLA'], label='Training Data')
    plt.plot(test_data['TSLA'], label='Actual Test Data')
    plt.plot(arima_forecast_series, label='ARIMA Forecast (statsmodels)', color='green')
    plt.title('TSLA Stock Price Forecast: ARIMA Model (statsmodels)')
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.legend()
    plt.grid(True)
    plt.show()


print("\n--- Training LSTM Model ---")

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data = scaler.fit_transform(train_data)
scaled_test_data = scaler.transform(test_data)

def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:(i + sequence_length), 0])
        y.append(data[i + sequence_length, 0])
    return np.array(X), np.array(y)

sequence_length = 60
X_train, y_train = create_sequences(scaled_train_data, sequence_length)
X_test, y_test = create_sequences(scaled_test_data, sequence_length)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

lstm_model = Sequential([
    LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.2),
    LSTM(units=50, return_sequences=False),
    Dropout(0.2),
    Dense(units=1)
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = lstm_model.fit(X_train, y_train,
                         epochs=100,
                         batch_size=32,
                         validation_split=0.2,
                         callbacks=[early_stopping],
                         verbose=1)

lstm_predictions_scaled = lstm_model.predict(X_test)
lstm_forecast_prices = scaler.inverse_transform(lstm_predictions_scaled)

test_data_actual_lstm = test_data['TSLA'].iloc[sequence_length:]

lstm_forecast_series = pd.Series(lstm_forecast_prices.flatten(), index=test_data_actual_lstm.index)

evaluate_model(test_data_actual_lstm, lstm_forecast_series, "LSTM")

plt.figure(figsize=(15, 7))
plt.plot(train_data['TSLA'], label='Training Data')
plt.plot(test_data_actual_lstm, label='Actual Test Data (Aligned for LSTM)')
plt.plot(lstm_forecast_series, label='LSTM Forecast', color='red')
plt.title('TSLA Stock Price Forecast: LSTM Model')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()