# Capstone Project: AI in Finance - Milestone 1
## TA's + Peter
### Giulio Bardelli, Allan Ilyasov, Peter Roumeliotis

In [51]:
%pip install yfinance pandas plotly statsmodels numpy scikit-learn nbformat


Note: you may need to restart the kernel to use updated packages.


In [52]:
import yfinance as yf
import pandas as pd

def get_stock_data(symbol: str, start="2020-01-01", end=None):
    """Fetch daily stock data from Yahoo Finance."""
    if end is None:
        end = pd.Timestamp.today().strftime('%Y-%m-%d')
    
    print(f"Fetching {symbol}...")
    df = yf.download(symbol, start=start, end=end, progress=False, auto_adjust=False)
    
    if df.empty:
        print(f"‚ö†Ô∏è Error fetching {symbol}: No data returned")
        return pd.DataFrame()
    
    # Flatten column names if they're multi-level
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    
    print(f"‚úì Fetched {len(df)} days of data for {symbol}")
    return df

# Fetch stock data (no throttling needed with yfinance!)
aapl = get_stock_data("AAPL")
nvda = get_stock_data("NVDA")
lyft = get_stock_data("LYFT")

print("\nAAPL Sample:")
print(aapl.head())
print("\nColumns:", aapl.columns.tolist())

Fetching AAPL...
‚úì Fetched 1482 days of data for AAPL
Fetching NVDA...
‚úì Fetched 1482 days of data for NVDA
Fetching LYFT...
‚úì Fetched 1482 days of data for LYFT

AAPL Sample:
Price       Adj Close      Close       High        Low       Open     Volume
Date                                                                        
2020-01-02  72.468262  75.087502  75.150002  73.797501  74.059998  135480400
2020-01-03  71.763710  74.357498  75.144997  74.125000  74.287498  146322800
2020-01-06  72.335541  74.949997  74.989998  73.187500  73.447502  118387200
2020-01-07  71.995346  74.597504  75.224998  74.370003  74.959999  108872000
2020-01-08  73.153488  75.797501  76.110001  74.290001  74.290001  132079200

Columns: ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']


In [53]:
# ============================================
# STANDARDIZED TRAIN/TEST SPLIT FOR ALL MODELS
# This split will be used for ARIMA (Milestone 1), LSTM, and GRU (Milestone 2)
# ============================================

TRAIN_SIZE = 0.8  # 80% training, 20% testing

print("\nüìä Creating standardized train/test splits...")
print(f"Train size: {TRAIN_SIZE*100}% | Test size: {(1-TRAIN_SIZE)*100}%")

# AAPL split
aapl_train_size = int(len(aapl) * TRAIN_SIZE)
aapl_train = aapl[:aapl_train_size]['Close']
aapl_test = aapl[aapl_train_size:]['Close']
print(f"\nAAPL: {len(aapl_train)} train samples, {len(aapl_test)} test samples")

# NVDA split
nvda_train_size = int(len(nvda) * TRAIN_SIZE)
nvda_train = nvda[:nvda_train_size]['Close']
nvda_test = nvda[nvda_train_size:]['Close']
print(f"NVDA: {len(nvda_train)} train samples, {len(nvda_test)} test samples")

# LYFT split
lyft_train_size = int(len(lyft) * TRAIN_SIZE)
lyft_train = lyft[:lyft_train_size]['Close']
lyft_test = lyft[lyft_train_size:]['Close']
print(f"LYFT: {len(lyft_train)} train samples, {len(lyft_test)} test samples")

print("\n‚úÖ Standardized splits created. All models will use these exact splits.")


üìä Creating standardized train/test splits...
Train size: 80.0% | Test size: 19.999999999999996%

AAPL: 1185 train samples, 297 test samples
NVDA: 1185 train samples, 297 test samples
LYFT: 1185 train samples, 297 test samples

‚úÖ Standardized splits created. All models will use these exact splits.


In [54]:
def add_ema_dema(df, span=20):
    """Add EMA and DEMA columns to the DataFrame."""
    df[f"EMA_{span}"] = df["Close"].ewm(span=span, adjust=False).mean()
    ema = df[f"EMA_{span}"]
    df[f"DEMA_{span}"] = 2*ema - ema.ewm(span=span, adjust=False).mean()
    return df

# Apply to all stocks
aapl = add_ema_dema(aapl, 20)
nvda = add_ema_dema(nvda, 20)
lyft = add_ema_dema(lyft, 20)


In [55]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_stock(df, symbol, span=20):
    fig = go.Figure()
    
    # Add traces
    fig.add_trace(go.Scatter(x=df.index, y=df["Close"], 
                             name="Close Price", 
                             line=dict(color='blue', width=2)))
    fig.add_trace(go.Scatter(x=df.index, y=df[f"EMA_{span}"], 
                             name=f"EMA {span}", 
                             line=dict(color='orange', dash='dash')))
    fig.add_trace(go.Scatter(x=df.index, y=df[f"DEMA_{span}"], 
                             name=f"DEMA {span}", 
                             line=dict(color='green', dash='dot')))
    
    # Update layout with range slider
    fig.update_layout(
        title=f"{symbol} Stock with EMA & DEMA",
        xaxis_title="Date",
        yaxis_title="Price ($)",
        hovermode='x unified',
        height=600,
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1d", step="day", stepmode="backward"),
                    dict(count=7, label="1w", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=3, label="3m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(count=5, label="5y", step="year", stepmode="backward"),
                    dict(step="all", label="All")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        )
    )
    
    fig.show()

plot_stock(aapl, "AAPL")
plot_stock(nvda, "NVDA")
plot_stock(lyft, "LYFT")

In [56]:
from statsmodels.tsa.seasonal import seasonal_decompose
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Step 3: Time Series Decomposition
# Decompose into Trend, Seasonality, and Residual components

def decompose_series(df, symbol):
    decomposition = seasonal_decompose(df["Adj Close"], model="multiplicative", period=252)
    
    # Create subplots with shared x-axis
    fig = make_subplots(
        rows=4, cols=1,
        subplot_titles=('Observed', 'Trend', 'Seasonal', 'Residual'),
        vertical_spacing=0.08,
        shared_xaxes=True
    )
    
    # Add traces for each component
    fig.add_trace(go.Scatter(x=df.index, y=decomposition.observed, 
                             name='Observed', line=dict(color='blue', width=2)),
                  row=1, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=decomposition.trend, 
                             name='Trend', line=dict(color='orange', width=2)),
                  row=2, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=decomposition.seasonal, 
                             name='Seasonal', line=dict(color='green', width=2)),
                  row=3, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=decomposition.resid, 
                             name='Residual', line=dict(color='red', width=1)),
                  row=4, col=1)
    
    # Update layout
    fig.update_layout(
        title_text=f"{symbol} - Time Series Decomposition",
        height=1000,
        showlegend=False,
        hovermode='x unified',
        template='plotly_white'
    )
    
    # Add y-axis labels
    fig.update_yaxes(title_text="Price ($)", row=1, col=1)
    fig.update_yaxes(title_text="Multiplier", row=2, col=1)
    fig.update_yaxes(title_text="Multiplier", row=3, col=1)
    fig.update_yaxes(title_text="Multiplier", row=4, col=1)
    
    # Add x-axis label to bottom
    fig.update_xaxes(title_text="Date", row=4, col=1)
    
    # Add range selector to top (row 1) and range slider to bottom (row 4)
    fig.update_xaxes(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(count=5, label="5y", step="year", stepmode="backward"),
                dict(step="all", label="All")
            ]),
            bgcolor="lightgray",
            activecolor="gray",
            y=1.15,
            yanchor="top"
        ),
        type="date",
        row=1, col=1
    )
    
    fig.update_xaxes(
        rangeslider=dict(visible=True),
        type="date",
        row=4, col=1
    )
    
    fig.show()

decompose_series(aapl, "AAPL")
decompose_series(nvda, "NVDA")
decompose_series(lyft, "LYFT")

In [57]:
# Step 4: Stationarity Diagnostic Tests
# ACF and PACF Analysis - helps identify AR order (p) and MA order (q)

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_acf_pacf_interactive(data, symbol, lags=40):
    """Create interactive ACF and PACF plots using Plotly (stem plot style)"""
    from statsmodels.tsa.stattools import acf, pacf
    
    # Calculate ACF and PACF
    acf_values = acf(data.dropna(), nlags=lags)
    pacf_values = pacf(data.dropna(), nlags=lags)
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=1,
        subplot_titles=(f'{symbol} - Autocorrelation Function (ACF)', 
                       f'{symbol} - Partial Autocorrelation Function (PACF)'),
        vertical_spacing=0.15
    )
    
    # ACF plot - using stem plot style (lines + markers)
    lags_range = list(range(len(acf_values)))
    
    # Add vertical lines (stems) for ACF
    for i, (lag, acf_val) in enumerate(zip(lags_range, acf_values)):
        fig.add_trace(go.Scatter(
            x=[lag, lag], 
            y=[0, acf_val],
            mode='lines',
            line=dict(color='blue', width=2),
            showlegend=False,
            hoverinfo='skip'
        ), row=1, col=1)
    
    # Add markers on top
    fig.add_trace(go.Scatter(
        x=lags_range, 
        y=acf_values,
        mode='markers',
        marker=dict(color='blue', size=6),
        name='ACF',
        showlegend=False
    ), row=1, col=1)
    
    # Add confidence interval lines for ACF
    conf_int = 1.96/np.sqrt(len(data))
    fig.add_hline(y=conf_int, line_dash="dash", line_color="red", row=1, col=1, opacity=0.5)
    fig.add_hline(y=-conf_int, line_dash="dash", line_color="red", row=1, col=1, opacity=0.5)
    fig.add_hline(y=0, line_color="black", row=1, col=1, line_width=1)
    
    # PACF plot - using stem plot style (lines + markers)
    # Add vertical lines (stems) for PACF
    for i, (lag, pacf_val) in enumerate(zip(lags_range, pacf_values)):
        fig.add_trace(go.Scatter(
            x=[lag, lag], 
            y=[0, pacf_val],
            mode='lines',
            line=dict(color='orange', width=2),
            showlegend=False,
            hoverinfo='skip'
        ), row=2, col=1)
    
    # Add markers on top
    fig.add_trace(go.Scatter(
        x=lags_range, 
        y=pacf_values,
        mode='markers',
        marker=dict(color='orange', size=6),
        name='PACF',
        showlegend=False
    ), row=2, col=1)
    
    # Add confidence interval lines for PACF
    fig.add_hline(y=conf_int, line_dash="dash", line_color="red", row=2, col=1, opacity=0.5)
    fig.add_hline(y=-conf_int, line_dash="dash", line_color="red", row=2, col=1, opacity=0.5)
    fig.add_hline(y=0, line_color="black", row=2, col=1, line_width=1)
    
    # Update layout
    fig.update_xaxes(title_text="Lags", row=1, col=1)
    fig.update_xaxes(title_text="Lags", row=2, col=1)
    fig.update_yaxes(title_text="Correlation", row=1, col=1)
    fig.update_yaxes(title_text="Correlation", row=2, col=1)
    
    fig.update_layout(
        height=800,
        showlegend=False,
        template='plotly_white',
        title_text=f"{symbol} - ACF & PACF Analysis"
    )
    
    fig.show()

# Plot ACF/PACF for all stocks
print("Analyzing autocorrelation patterns...")
plot_acf_pacf_interactive(aapl["Adj Close"], "AAPL")
plot_acf_pacf_interactive(nvda["Adj Close"], "NVDA")
plot_acf_pacf_interactive(lyft["Adj Close"], "LYFT")

Analyzing autocorrelation patterns...


## üìä Understanding ACF/PACF Patterns

### Why do all stocks show similar slow decay in ACF?

**This is CORRECT and EXPECTED!** Here's why:

#### Raw Stock Prices (Non-Stationary)
- Stock prices follow a **random walk** pattern
- Each price heavily depends on the previous price: `Price(t) ‚âà Price(t-1) + noise`
- This creates **slow decay** in ACF (what you see in the plots above)
- **All stocks behave similarly** because they all follow random walk dynamics

#### Key Insight:
The slow decay in ACF is a **diagnostic indicator** that tells us:
- ‚úÖ The series is non-stationary (has a unit root)
- ‚úÖ We need to apply differencing (d=1) to make it stationary
- ‚úÖ This validates our use of ARIMA instead of pure AR/MA models

#### What to Look For:
- **Raw Prices**: Slow, gradual decay in ACF (all stocks similar)
- **Differenced Data**: Quick drop to zero in ACF (shows variability between stocks)

The differenced data (returns) will show MORE variability and different patterns across stocks!

In [58]:
# Step 4 (continued): Augmented Dickey-Fuller (ADF) Stationarity Test
# Tests the null hypothesis that a time series has a unit root (non-stationary)

from statsmodels.tsa.stattools import adfuller

def adf_test(series, symbol):
    """Perform ADF test and print results"""
    result = adfuller(series.dropna())
    
    print(f"\n{'='*60}")
    print(f"ADF Stationarity Test Results - {symbol}")
    print(f"{'='*60}")
    print(f"ADF Statistic:        {result[0]:.6f}")
    print(f"p-value:              {result[1]:.6f}")
    print(f"Critical Values:")
    for key, value in result[4].items():
        print(f"  {key:>5}: {value:.6f}")
    
    # Interpretation
    if result[1] <= 0.05:
        print(f"\n‚úÖ Result: STATIONARY (p-value ‚â§ 0.05)")
        print(f"   The series does NOT have a unit root.")
        print(f"   Safe to use without differencing.")
    else:
        print(f"\n‚ùå Result: NON-STATIONARY (p-value > 0.05)")
        print(f"   The series HAS a unit root.")
        print(f"   Requires differencing or transformation.")
    
    return result

# Test stationarity for all stocks
print("\nTesting for Stationarity (ADF Test)...")
print("="*60)
print("\nüìä RAW PRICE DATA (Non-Stationary - Random Walk)")
print("Expected: High p-value, slow ACF decay")

aapl_adf = adf_test(aapl["Adj Close"], "AAPL")
nvda_adf = adf_test(nvda["Adj Close"], "NVDA")
lyft_adf = adf_test(lyft["Adj Close"], "LYFT")

# Test on differenced data (1st order difference)
print("\n\n" + "="*60)
print("ADF Test on DIFFERENCED Data (d=1)")
print("="*60)
print("\nüìä DIFFERENCED DATA (Should be Stationary)")
print("Expected: Low p-value, ACF drops quickly")

aapl_diff_adf = adf_test(aapl["Adj Close"].diff().dropna(), "AAPL (Differenced)")
nvda_diff_adf = adf_test(nvda["Adj Close"].diff().dropna(), "NVDA (Differenced)")
lyft_diff_adf = adf_test(lyft["Adj Close"].diff().dropna(), "LYFT (Differenced)")

# Now plot ACF/PACF for DIFFERENCED data to show the contrast
print("\n\n" + "="*60)
print("ACF/PACF on DIFFERENCED Data (Returns)")
print("="*60)
print("Note: These should show different patterns than raw prices")
print("Look for quick decay and different behaviors across stocks")

# Plot differenced data ACF/PACF
plot_acf_pacf_interactive(aapl["Adj Close"].diff().dropna(), "AAPL (Differenced/Returns)")
plot_acf_pacf_interactive(nvda["Adj Close"].diff().dropna(), "NVDA (Differenced/Returns)")
plot_acf_pacf_interactive(lyft["Adj Close"].diff().dropna(), "LYFT (Differenced/Returns)")


Testing for Stationarity (ADF Test)...

üìä RAW PRICE DATA (Non-Stationary - Random Walk)
Expected: High p-value, slow ACF decay

ADF Stationarity Test Results - AAPL
ADF Statistic:        -0.884830
p-value:              0.792927
Critical Values:
     1%: -3.434773
     5%: -2.863494
    10%: -2.567810

‚ùå Result: NON-STATIONARY (p-value > 0.05)
   The series HAS a unit root.
   Requires differencing or transformation.

ADF Stationarity Test Results - NVDA
ADF Statistic:        0.654686
p-value:              0.988889
Critical Values:
     1%: -3.434840
     5%: -2.863523
    10%: -2.567826

‚ùå Result: NON-STATIONARY (p-value > 0.05)
   The series HAS a unit root.
   Requires differencing or transformation.

ADF Stationarity Test Results - LYFT
ADF Statistic:        -1.780204
p-value:              0.390333
Critical Values:
     1%: -3.434834
     5%: -2.863520
    10%: -2.567824

‚ùå Result: NON-STATIONARY (p-value > 0.05)
   The series HAS a unit root.
   Requires differencing or t

In [59]:
# Step 5: Classical Time Series Models (MA, AR, ARIMA)

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import plotly.graph_objects as go
import numpy as np

# Function to prepare train/test split
def prepare_data(df, train_ratio=0.8):
    data = df["Adj Close"].dropna()
    train_size = int(len(data) * train_ratio)
    train, test = data[:train_size], data[train_size:]
    return train, test

# Prepare data for all stocks
aapl_train, aapl_test = prepare_data(aapl)
nvda_train, nvda_test = prepare_data(nvda)
lyft_train, lyft_test = prepare_data(lyft)

print(f"AAPL - Training: {len(aapl_train)} days, Test: {len(aapl_test)} days")
print(f"NVDA - Training: {len(nvda_train)} days, Test: {len(nvda_test)} days")
print(f"LYFT - Training: {len(lyft_train)} days, Test: {len(lyft_test)} days")

AAPL - Training: 1185 days, Test: 297 days
NVDA - Training: 1185 days, Test: 297 days
LYFT - Training: 1185 days, Test: 297 days


In [60]:
# Moving Average (MA) Model - MA(q)
# MA models use past forecast errors in a regression-like model

from sklearn.metrics import mean_squared_error, mean_absolute_error

def fit_ma_model(train, test, q=20):
    """Fit MA model and return forecasts and metrics"""
    model = ARIMA(train, order=(0, 0, q))
    fitted = model.fit()
    forecast = fitted.forecast(steps=len(test))
    forecast.index = test.index
    
    rmse = np.sqrt(mean_squared_error(test, forecast))
    mae = mean_absolute_error(test, forecast)
    aic = fitted.aic
    bic = fitted.bic
    
    return fitted, forecast, rmse, mae, aic, bic

# Fit MA(20) for all stocks
print("="*60)
print("MOVING AVERAGE (MA) MODEL - MA(20)")
print("="*60)

aapl_ma_fitted, aapl_ma_forecast, aapl_ma_rmse, aapl_ma_mae, aapl_ma_aic, aapl_ma_bic = fit_ma_model(aapl_train, aapl_test)
print(f"\nAAPL MA(20) - RMSE: ${aapl_ma_rmse:.2f}, MAE: ${aapl_ma_mae:.2f}, AIC: {aapl_ma_aic:.2f}, BIC: {aapl_ma_bic:.2f}")

nvda_ma_fitted, nvda_ma_forecast, nvda_ma_rmse, nvda_ma_mae, nvda_ma_aic, nvda_ma_bic = fit_ma_model(nvda_train, nvda_test)
print(f"NVDA MA(20) - RMSE: ${nvda_ma_rmse:.2f}, MAE: ${nvda_ma_mae:.2f}, AIC: {nvda_ma_aic:.2f}, BIC: {nvda_ma_bic:.2f}")

lyft_ma_fitted, lyft_ma_forecast, lyft_ma_rmse, lyft_ma_mae, lyft_ma_aic, lyft_ma_bic = fit_ma_model(lyft_train, lyft_test)
print(f"LYFT MA(20) - RMSE: ${lyft_ma_rmse:.2f}, MAE: ${lyft_ma_mae:.2f}, AIC: {lyft_ma_aic:.2f}, BIC: {lyft_ma_bic:.2f}")

MOVING AVERAGE (MA) MODEL - MA(20)



A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and


AAPL MA(20) - RMSE: $82.97, MAE: $79.85, AIC: 6927.81, BIC: 7039.52



No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Non-invertible starting MA parameters found. Using zeros as starting parameters.



NVDA MA(20) - RMSE: $192.49, MAE: $141.67, AIC: 9517.30, BIC: 9629.00



Maximum Likelihood optimization failed to converge. Check mle_retvals



LYFT MA(20) - RMSE: $82.81, MAE: $29.02, AIC: 6943.82, BIC: 7055.52



No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



In [61]:
# Autoregressive (AR) Model - AR(p)
# AR models use past values of the series itself to predict future values

def fit_ar_model(train, test, p=20):
    """Fit AR model and return forecasts and metrics"""
    model = ARIMA(train, order=(p, 0, 0))
    fitted = model.fit()
    forecast = fitted.forecast(steps=len(test))
    forecast.index = test.index
    
    rmse = np.sqrt(mean_squared_error(test, forecast))
    mae = mean_absolute_error(test, forecast)
    aic = fitted.aic
    bic = fitted.bic
    
    return fitted, forecast, rmse, mae, aic, bic


# Fit ARIMA(20,1,20) for all stocks
print("\n" + "="*60)
print("ARIMA MODEL - ARIMA(20,1,20)")
print("="*60)

aapl_arima_fitted, aapl_arima_forecast, aapl_arima_rmse, aapl_arima_mae, aapl_arima_aic, aapl_arima_bic = fit_arima_model(aapl_train, aapl_test)
print(f"\nAAPL ARIMA(20,1,20) - RMSE: ${aapl_arima_rmse:.2f}, MAE: ${aapl_arima_mae:.2f}, AIC: {aapl_arima_aic:.2f}, BIC: {aapl_arima_bic:.2f}")

nvda_arima_fitted, nvda_arima_forecast, nvda_arima_rmse, nvda_arima_mae, nvda_arima_aic, nvda_arima_bic = fit_arima_model(nvda_train, nvda_test)
print(f"NVDA ARIMA(20,1,20) - RMSE: ${nvda_arima_rmse:.2f}, MAE: ${nvda_arima_mae:.2f}, AIC: {nvda_arima_aic:.2f}, BIC: {nvda_arima_bic:.2f}")

lyft_arima_fitted, lyft_arima_forecast, lyft_arima_rmse, lyft_arima_mae, lyft_arima_aic, lyft_arima_bic = fit_arima_model(lyft_train, lyft_test)
print(f"LYFT ARIMA(20,1,20) - RMSE: ${lyft_arima_rmse:.2f}, MAE: ${lyft_arima_mae:.2f}, AIC: {lyft_arima_aic:.2f}, BIC: {lyft_arima_bic:.2f}")


# Fit AR(20) for all stocks
print("="*60)
print("AUTOREGRESSIVE (AR) MODEL - AR(20)")
print("="*60)

aapl_ar_fitted, aapl_ar_forecast, aapl_ar_rmse, aapl_ar_mae, aapl_ar_aic, aapl_ar_bic = fit_ar_model(aapl_train, aapl_test)
print(f"\nAAPL AR(20) - RMSE: ${aapl_ar_rmse:.2f}, MAE: ${aapl_ar_mae:.2f}, AIC: {aapl_ar_aic:.2f}, BIC: {aapl_ar_bic:.2f}")

nvda_ar_fitted, nvda_ar_forecast, nvda_ar_rmse, nvda_ar_mae, nvda_ar_aic, nvda_ar_bic = fit_ar_model(nvda_train, nvda_test)
print(f"NVDA AR(20) - RMSE: ${nvda_ar_rmse:.2f}, MAE: ${nvda_ar_mae:.2f}, AIC: {nvda_ar_aic:.2f}, BIC: {nvda_ar_bic:.2f}")

lyft_ar_fitted, lyft_ar_forecast, lyft_ar_rmse, lyft_ar_mae, lyft_ar_aic, lyft_ar_bic = fit_ar_model(lyft_train, lyft_test)
print(f"LYFT AR(20) - RMSE: ${lyft_ar_rmse:.2f}, MAE: ${lyft_ar_mae:.2f}, AIC: {lyft_ar_aic:.2f}, BIC: {lyft_ar_bic:.2f}")


# ARIMA Model - ARIMA(p, d, q)
# Combines AR and MA with differencing (d) for non-stationary data

def fit_arima_model(train, test, p=20, d=1, q=20):
    """Fit ARIMA model and return forecasts and metrics"""
    model = ARIMA(train, order=(p, d, q))
    fitted = model.fit()
    forecast = fitted.forecast(steps=len(test))
    forecast.index = test.index
    
    rmse = np.sqrt(mean_squared_error(test, forecast))
    mae = mean_absolute_error(test, forecast)
    aic = fitted.aic
    bic = fitted.bic
    
    return fitted, forecast, rmse, mae, aic, bic

# Fit ARIMA(20,1,20) for all stocks
print("\n" + "="*60)








A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.




ARIMA MODEL - ARIMA(20,1,20)



Maximum Likelihood optimization failed to converge. Check mle_retvals


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.




AAPL ARIMA(20,1,20) - RMSE: $24.29, MAE: $19.76, AIC: 5721.33, BIC: 5929.48



Maximum Likelihood optimization failed to converge. Check mle_retvals


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Non-invertible starting MA parameters found. Using zeros as starting parameters.



NVDA ARIMA(20,1,20) - RMSE: $43.01, MAE: $34.86, AIC: 4324.28, BIC: 4532.42



Maximum Likelihood optimization failed to converge. Check mle_retvals


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



LYFT ARIMA(20,1,20) - RMSE: $4.50, MAE: $3.36, AIC: 3894.77, BIC: 4102.92
AUTOREGRESSIVE (AR) MODEL - AR(20)



No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.




AAPL AR(20) - RMSE: $33.55, MAE: $25.98, AIC: 5704.85, BIC: 5816.55



Maximum Likelihood optimization failed to converge. Check mle_retvals


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



NVDA AR(20) - RMSE: $50.24, MAE: $40.56, AIC: 4434.71, BIC: 4546.41
LYFT AR(20) - RMSE: $3.58, MAE: $2.97, AIC: 3882.95, BIC: 3994.65




No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



In [None]:
# Step 6: Forecast Visualization with Confidence Intervals

def plot_forecasts_with_intervals(train, test, ma_fitted, ar_fitted, arima_fitted, symbol):
    """Plot forecasts with confidence intervals to show prediction uncertainty"""
    
    # Get forecasts with confidence intervals
    ma_forecast_obj = ma_fitted.get_forecast(steps=len(test))
    ar_forecast_obj = ar_fitted.get_forecast(steps=len(test))
    arima_forecast_obj = arima_fitted.get_forecast(steps=len(test))
    
    # Extract forecasts and confidence intervals
    ma_forecast = ma_forecast_obj.predicted_mean
    ma_forecast.index = test.index
    ma_ci = ma_forecast_obj.conf_int()
    ma_ci.index = test.index
    
    ar_forecast = ar_forecast_obj.predicted_mean
    ar_forecast.index = test.index
    ar_ci = ar_forecast_obj.conf_int()
    ar_ci.index = test.index
    
    arima_forecast = arima_forecast_obj.predicted_mean
    arima_forecast.index = test.index
    arima_ci = arima_forecast_obj.conf_int()
    arima_ci.index = test.index
    
    fig = go.Figure()
    
    # Training data
    fig.add_trace(go.Scatter(x=train.index, y=train, 
                             name='Training Data', 
                             line=dict(color='blue', width=2)))
    
    # Actual test data
    fig.add_trace(go.Scatter(x=test.index, y=test, 
                             name='Actual Test Data', 
                             line=dict(color='black', width=3)))
    
    # ARIMA forecast with confidence interval (show this one prominently)
    fig.add_trace(go.Scatter(
        x=arima_ci.index,
        y=arima_ci.iloc[:, 1],
        fill=None,
        mode='lines',
        line_color='rgba(0,255,0,0)',
        showlegend=False,
        hoverinfo='skip'
    ))
    fig.add_trace(go.Scatter(
        x=arima_ci.index,
        y=arima_ci.iloc[:, 0],
        fill='tonexty',
        mode='lines',
        line_color='rgba(0,255,0,0)',
        name='ARIMA 95% CI',
        fillcolor='rgba(0,255,0,0.2)'
    ))
    fig.add_trace(go.Scatter(x=arima_forecast.index, y=arima_forecast, 
                             name='ARIMA(20,1,20) Forecast', 
                             line=dict(color='green', width=2, dash='dash')))
    
    # AR forecast with confidence interval
    fig.add_trace(go.Scatter(
        x=ar_ci.index,
        y=ar_ci.iloc[:, 1],
        fill=None,
        mode='lines',
        line_color='rgba(255,165,0,0)',
        showlegend=False,
        hoverinfo='skip'
    ))
    fig.add_trace(go.Scatter(
        x=ar_ci.index,
        y=ar_ci.iloc[:, 0],
        fill='tonexty',
        mode='lines',
        line_color='rgba(255,165,0,0)',
        name='AR 95% CI',
        fillcolor='rgba(255,165,0,0.2)'
    ))
    fig.add_trace(go.Scatter(x=ar_forecast.index, y=ar_forecast, 
                             name='AR(20) Forecast', 
                             line=dict(color='orange', width=2, dash='dash')))
    
    # MA forecast (line only - usually too uncertain for CI to be useful)
    fig.add_trace(go.Scatter(x=ma_forecast.index, y=ma_forecast, 
                             name='MA(20) Forecast', 
                             line=dict(color='red', width=1, dash='dot'),
                             opacity=0.5))
    
    # Update layout
    fig.update_layout(
        title=f"{symbol} - Model Forecasts with 95% Confidence Intervals<br><sub>Note: Forecast represents expected mean, not actual fluctuations. Wide intervals show prediction uncertainty.</sub>",
        xaxis_title="Date",
        yaxis_title="Price ($)",
        hovermode='x unified',
        height=700,
        template='plotly_white',
        xaxis=dict(
            rangeselector=dict(
                """
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=3, label="3m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all", label="All")
                ]),
                
                bgcolor="lightgray",
                activecolor="gray"
            ),
            """
            ),
            rangeslider=dict(visible=True),
            type="date"
        )
        
    )
    
    fig.show()

# Plot for all stocks with confidence intervals
print("\n" + "="*80)
print("FORECAST PLOTS WITH CONFIDENCE INTERVALS")
print("="*80)
print("\nüìä Key Points:")
print("  ‚Ä¢ Forecast line = Expected MEAN (not actual wiggles)")
print("  ‚Ä¢ Shaded areas = 95% confidence intervals (prediction uncertainty)")
print("  ‚Ä¢ Wider intervals = More uncertainty")
print("  ‚Ä¢ Straight forecasts are NORMAL for stock prices (random walk behavior)")
print("="*80 + "\n")

plot_forecasts_with_intervals(aapl_train, aapl_test, aapl_ma_fitted, aapl_ar_fitted, aapl_arima_fitted, "AAPL")
plot_forecasts_with_intervals(nvda_train, nvda_test, nvda_ma_fitted, nvda_ar_fitted, nvda_arima_fitted, "NVDA")
plot_forecasts_with_intervals(lyft_train, lyft_test, lyft_ma_fitted, lyft_ar_fitted, lyft_arima_fitted, "LYFT")


FORECAST PLOTS WITH CONFIDENCE INTERVALS

üìä Key Points:
  ‚Ä¢ Forecast line = Expected MEAN (not actual wiggles)
  ‚Ä¢ Shaded areas = 95% confidence intervals (prediction uncertainty)
  ‚Ä¢ Wider intervals = More uncertainty
  ‚Ä¢ Straight forecasts are NORMAL for stock prices (random walk behavior)




No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.



ValueError: 
    Invalid value of type 'builtins.str' received for the 'template' property of layout
        Received value: 'plotly_white\n        xaxis=dict(\n            rangeselector=dict(\n                buttons=list([\n                    dict(count=1, label="1m", step="month", stepmode="backward"),\n                    dict(count=3, label="3m", step="month", stepmode="backward"),\n                    dict(count=6, label="6m", step="month", stepmode="backward"),\n                    dict(count=1, label="1y", step="year", stepmode="backward"),\n                    dict(step="all", label="All")\n                ]),\n\n                bgcolor="lightgray",\n                activecolor="gray"\n            ),\n            rangeslider=dict(visible=True),\n            type="date"\n        )\n        '

    The 'template' property is an instance of Template
    that may be specified as:
      - An instance of :class:`plotly.graph_objs.layout.Template`
      - A dict of string/value properties that will be passed
        to the Template constructor
      - The name of a registered template where current registered templates
        are stored in the plotly.io.templates configuration object. The names
        of all registered templates can be retrieved with:
            >>> import plotly.io as pio
            >>> list(pio.templates)  # doctest: +ELLIPSIS
            ['ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white', ...]

      - A string containing multiple registered template names, joined on '+'
        characters (e.g. 'template1+template2'). In this case the resulting
        template is computed by merging together the collection of registered
        templates

# Step 7: Executive Summary & Consulting Recommendations

## Client Recommendation Report

---

### 1. Executive Summary

Our analysis examined three classical time series models (MA, AR, and ARIMA) across three distinct stocks: **AAPL** (stable tech giant), **NVDA** (high-growth semiconductor), and **LYFT** (volatile ride-sharing).

**Key Finding:** ARIMA(20,1,20) provides the most robust forecasting performance across all assets, balancing model complexity with predictive accuracy.

---

### 2. Data Quality & Stationarity Assessment

#### Stationarity Testing (ADF Test Results):
- **All three stocks showed non-stationary behavior** in raw price data (p-value > 0.05)
- **First-order differencing** (d=1) successfully transformed the data to stationary
- This validates our use of ARIMA with d=1 for modeling

**Implication:** Raw stock prices contain trends and unit roots, requiring differencing before reliable modeling.

---

### 3. Model Performance Summary

Comprehensive metrics (RMSE, MAE, AIC, BIC) calculated for all models and stocks.

**Best Models by Stock:**
- **AAPL**: ARIMA(20,1,20) - Best balance of accuracy and complexity
- **NVDA**: ARIMA(20,1,20) - Handles high volatility effectively  
- **LYFT**: AR(20) or ARIMA(20,1,20) - Both perform well for extreme volatility

---

### 4. Key Insights

#### 4.1 Model-Specific Findings

**MA(20) Model:**
- ‚ùå **Worst performer** across all stocks
- Struggles with stock price forecasting due to reliance on forecast errors
- Not recommended for production use

**AR(20) Model:**
- ‚úÖ **Strong performer**, especially for LYFT
- Reliable for stocks with clear momentum patterns
- Lower computational cost than ARIMA
- Recommended as baseline model

**ARIMA(20,1,20) Model:**
- ‚úÖ **Best overall performance**
- Handles non-stationarity through differencing
- Captures both autoregressive and moving average components
- **Primary recommendation** for production forecasting

#### 4.2 Stock-Specific Behavior

**AAPL:** Moderate volatility, strong trend components  
**NVDA:** High volatility, rapid price movements  
**LYFT:** Extreme volatility, AR model performs exceptionally well

---

### 5. Recommendations

#### For Short-Term Trading (1-30 days):
1. **Use ARIMA(20,1,20)** for highest accuracy
2. Monitor forecast intervals and update models weekly
3. Consider ensemble approaches combining AR and ARIMA

#### For Risk Management:
1. ARIMA provides better confidence intervals
2. Use AIC/BIC to detect model degradation over time
3. Re-fit models monthly or after major market events

#### Model Limitations:
‚ö†Ô∏è **Important:** These models **cannot predict:**
- News-driven shocks (earnings, FDA approvals, etc.)
- Black swan events
- Regime changes in market conditions

**Best Use Cases:**
- Baseline forecasts for algorithmic trading
- Risk modeling and VaR calculations
- Portfolio rebalancing signals

---

### 6. Next Steps

1. **Implement ARIMA(20,1,20)** for production forecasting
2. Add **confidence intervals** to all forecasts
3. Consider **GARCH models** for volatility forecasting
4. Explore **machine learning** approaches (LSTM, Prophet) for comparison
5. Build **ensemble models** combining multiple approaches

---

### 7. Technical Notes

- **Data Source:** Yahoo Finance (yfinance library)
- **Time Period:** 2020-01-01 to present (~5 years)
- **Lag Selection:** 20 lags (~1 month of trading data) balances complexity and performance
- **Differencing:** d=1 successfully achieves stationarity for all stocks
- **Evaluation Period:** 20% holdout test set (most recent data)
- **Metrics:** RMSE/MAE (prediction error), AIC/BIC (model quality)

---
# Part 2: Deep Learning Preparation
## Milestone 1 (Continued) - Data Normalization & Scaling for LSTM/GRU

Now that we've completed our classical time series analysis (ARIMA, AR, MA), we need to prepare our data for deep learning models.

**Why Normalize?**
- Neural networks (LSTM/GRU) perform better with scaled features (typically 0-1 range)
- Prevents features with larger magnitudes from dominating the learning process
- Improves gradient descent convergence and training stability

We'll use **MinMaxScaler** to scale our features to [0, 1] range.

In [63]:
# Step 8: Data Normalization for Deep Learning (LSTM/GRU)

from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np

def normalize_stock_data(df, features=['Open', 'High', 'Low', 'Close', 'Volume']):
    """
    Normalize stock data using MinMaxScaler for LSTM/GRU models.

    Args:
        df: DataFrame with stock data
        features: List of features to normalize

    Returns:
        normalized_df: Normalized DataFrame
        scaler: Fitted scaler object (needed for inverse transform later)
    """
    # Create a copy to avoid modifying original
    df_copy = df[features].copy()

    # Initialize scaler
    scaler = MinMaxScaler(feature_range=(0, 1))

    # Fit and transform
    normalized_values = scaler.fit_transform(df_copy)

    # Create normalized DataFrame with same index
    normalized_df = pd.DataFrame(
        normalized_values,
        columns=features,
        index=df.index
    )

    return normalized_df, scaler

# Normalize all stocks
print("="*60)
print("DATA NORMALIZATION FOR DEEP LEARNING")
print("="*60)

features_to_scale = ['Open', 'High', 'Low', 'Close', 'Volume']

aapl_normalized, aapl_scaler = normalize_stock_data(aapl, features_to_scale)
print(f"\n‚úì AAPL normalized - Shape: {aapl_normalized.shape}")
print(f"  Original Close range: ${aapl['Close'].min():.2f} - ${aapl['Close'].max():.2f}")
print(f"  Normalized Close range: {aapl_normalized['Close'].min():.4f} - {aapl_normalized['Close'].max():.4f}")

nvda_normalized, nvda_scaler = normalize_stock_data(nvda, features_to_scale)
print(f"\n‚úì NVDA normalized - Shape: {nvda_normalized.shape}")
print(f"  Original Close range: ${nvda['Close'].min():.2f} - ${nvda['Close'].max():.2f}")
print(f"  Normalized Close range: {nvda_normalized['Close'].min():.4f} - {nvda_normalized['Close'].max():.4f}")

lyft_normalized, lyft_scaler = normalize_stock_data(lyft, features_to_scale)
print(f"\n‚úì LYFT normalized - Shape: {lyft_normalized.shape}")
print(f"  Original Close range: ${lyft['Close'].min():.2f} - ${lyft['Close'].max():.2f}")
print(f"  Normalized Close range: {lyft_normalized['Close'].min():.4f} - {lyft_normalized['Close'].max():.4f}")

print("\n" + "="*60)
print("Scalers saved for inverse transformation after predictions!")
print("="*60)

DATA NORMALIZATION FOR DEEP LEARNING

‚úì AAPL normalized - Shape: (1482, 5)
  Original Close range: $56.09 - $275.25
  Normalized Close range: 0.0000 - 1.0000

‚úì NVDA normalized - Shape: (1482, 5)
  Original Close range: $4.91 - $207.04
  Normalized Close range: 0.0000 - 1.0000

‚úì LYFT normalized - Shape: (1482, 5)
  Original Close range: $7.99 - $67.42
  Normalized Close range: 0.0000 - 1.0000

Scalers saved for inverse transformation after predictions!


In [64]:
# Visualize Normalized vs Original Data

import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_normalized_comparison(df_original, df_normalized, symbol):
    """Compare original and normalized stock prices"""

    fig = make_subplots(
        rows=2, cols=1,
        subplot_titles=(f'{symbol} - Original Close Price',
                       f'{symbol} - Normalized Close Price (Scaled for Full 5-Year Period)'),
        vertical_spacing=0.12
    )

    # Original data
    fig.add_trace(
        go.Scatter(x=df_original.index, y=df_original['Close'],
                   name='Original', line=dict(color='blue', width=2)),
        row=1, col=1
    )

    # Normalized data
    fig.add_trace(
        go.Scatter(x=df_normalized.index, y=df_normalized['Close'],
                   name='Normalized', line=dict(color='green', width=2)),
        row=2, col=1
    )

    # Update layout
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="Price ($)", row=1, col=1)
    fig.update_yaxes(title_text="Scaled Value [0-1]", row=2, col=1)

    fig.update_layout(
        height=800,
        showlegend=False,
        template='plotly_white',
        title_text=f"{symbol} - Original vs Normalized Data<br><sub>Note: Normalization uses min/max from entire 5-year dataset (2020-2025)</sub>"
    )

    fig.show()

# Plot for all stocks
print("\nVisualizing normalization results...")
plot_normalized_comparison(aapl, aapl_normalized, "AAPL")
plot_normalized_comparison(nvda, nvda_normalized, "NVDA")
plot_normalized_comparison(lyft, lyft_normalized, "LYFT")


Visualizing normalization results...


In [65]:
# Summary Statistics: Before and After Normalization

print("="*80)
print("NORMALIZATION SUMMARY - ALL STOCKS")
print("="*80)

for symbol, df_orig, df_norm in [
    ("AAPL", aapl, aapl_normalized),
    ("NVDA", nvda, nvda_normalized),
    ("LYFT", lyft, lyft_normalized)
]:
    print(f"\n{symbol}:")
    print(f"  Original Close:")
    print(f"    Min:  ${df_orig['Close'].min():>10.2f}")
    print(f"    Max:  ${df_orig['Close'].max():>10.2f}")
    print(f"    Mean: ${df_orig['Close'].mean():>10.2f}")
    print(f"    Std:  ${df_orig['Close'].std():>10.2f}")

    print(f"  Normalized Close:")
    print(f"    Min:  {df_norm['Close'].min():>10.4f}")
    print(f"    Max:  {df_norm['Close'].max():>10.4f}")
    print(f"    Mean: {df_norm['Close'].mean():>10.4f}")
    print(f"    Std:  {df_norm['Close'].std():>10.4f}")

print("\n" + "="*80)
print("‚úÖ MILESTONE 1 COMPLETE - Ready for LSTM/GRU Implementation!")
print("="*80)

NORMALIZATION SUMMARY - ALL STOCKS

AAPL:
  Original Close:
    Min:  $     56.09
    Max:  $    275.25
    Mean: $    165.14
    Std:  $     47.19
  Normalized Close:
    Min:      0.0000
    Max:      1.0000
    Mean:     0.4976
    Std:      0.2153

NVDA:
  Original Close:
    Min:  $      4.91
    Max:  $    207.04
    Mean: $     55.43
    Std:  $     54.67
  Normalized Close:
    Min:      0.0000
    Max:      1.0000
    Mean:     0.2499
    Std:      0.2705

LYFT:
  Original Close:
    Min:  $      7.99
    Max:  $     67.42
    Mean: $     25.43
    Std:  $     16.14
  Normalized Close:
    Min:      0.0000
    Max:      1.0000
    Mean:     0.2935
    Std:      0.2716

‚úÖ MILESTONE 1 COMPLETE - Ready for LSTM/GRU Implementation!


---
# Milestone 2: LSTM and GRU Model Development

**Objective**: Implement deep learning models for stock price prediction and compare with ARIMA baseline.

**Key Steps:**
1. Create sequence windows (60-day lookback)
2. Build and train LSTM model  
3. Build and train GRU model
4. Evaluate and compare performance

**Sequence Length**: 60 days ‚Üí predict day 61  
**Train/Test Split**: 80/20 (consistent with ARIMA)

In [66]:
pip install tensorflow-macos tensorflow-metal


Note: you may need to restart the kernel to use updated packages.


In [67]:
# Test TensorFlow installation
import tensorflow as tf
import keras
import numpy as np

print(f"‚úÖ TensorFlow: {tf.__version__}")
print(f"‚úÖ Keras: {keras.__version__}")
print(f"‚úÖ NumPy: {np.__version__}")

‚úÖ TensorFlow: 2.16.2
‚úÖ Keras: 3.12.0
‚úÖ NumPy: 1.26.4


In [68]:
# Step 1: Create Sequence Windows
# Use standardized train/test splits to create sequences for LSTM/GRU

from sklearn.preprocessing import MinMaxScaler

SEQUENCE_LENGTH = 60  # Use past 60 days to predict next day

def create_sequences(data, seq_length=60):
    """Create sequences for LSTM/GRU from standardized train/test data"""
    scaler = MinMaxScaler(feature_range=(0, 1))
    
    # Scale the entire dataset
    scaled_data = scaler.fit_transform(data.values.reshape(-1, 1))
    
    X, y = [], []
    for i in range(seq_length, len(scaled_data)):
        X.append(scaled_data[i-seq_length:i, 0])
        y.append(scaled_data[i, 0])
    
    return np.array(X), np.array(y), scaler

def split_sequences(train_data, test_data, seq_length=60):
    """Create sequences from pre-split train/test data"""
    # Combine for proper scaling
    combined = pd.concat([train_data, test_data])
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_combined = scaler.fit_transform(combined.values.reshape(-1, 1))
    
    # Split back
    train_len = len(train_data)
    scaled_train = scaled_combined[:train_len]
    scaled_test = scaled_combined[train_len:]
    
    # Create sequences for training
    X_train, y_train = [], []
    for i in range(seq_length, len(scaled_train)):
        X_train.append(scaled_train[i-seq_length:i, 0])
        y_train.append(scaled_train[i, 0])
    
    # Create sequences for testing
    # Need to use last seq_length points from train to start test sequences
    X_test, y_test = [], []
    for i in range(len(scaled_test)):
        if i < seq_length:
            # Use overlap from train data
            start_idx = train_len - seq_length + i
            sequence = scaled_combined[start_idx:start_idx + seq_length, 0]
        else:
            # Use only test data
            sequence = scaled_test[i-seq_length:i, 0]
        
        X_test.append(sequence)
        y_test.append(scaled_test[i, 0])
    
    return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), scaler

print("\nüîÑ Creating sequences from standardized splits...")

# AAPL sequences
aapl_X_train, aapl_y_train, aapl_X_test, aapl_y_test, aapl_scaler = split_sequences(
    aapl_train, aapl_test, SEQUENCE_LENGTH
)

# NVDA sequences
nvda_X_train, nvda_y_train, nvda_X_test, nvda_y_test, nvda_scaler = split_sequences(
    nvda_train, nvda_test, SEQUENCE_LENGTH
)

# LYFT sequences
lyft_X_train, lyft_y_train, lyft_X_test, lyft_y_test, lyft_scaler = split_sequences(
    lyft_train, lyft_test, SEQUENCE_LENGTH
)

print(f"\nAAPL - Train: {aapl_X_train.shape}, Test: {aapl_X_test.shape}")
print(f"NVDA - Train: {nvda_X_train.shape}, Test: {nvda_X_test.shape}")
print(f"LYFT - Train: {lyft_X_train.shape}, Test: {lyft_X_test.shape}")

# Reshape for LSTM/GRU input (samples, timesteps, features)
aapl_X_train = aapl_X_train.reshape((aapl_X_train.shape[0], aapl_X_train.shape[1], 1))
aapl_X_test = aapl_X_test.reshape((aapl_X_test.shape[0], aapl_X_test.shape[1], 1))

nvda_X_train = nvda_X_train.reshape((nvda_X_train.shape[0], nvda_X_train.shape[1], 1))
nvda_X_test = nvda_X_test.reshape((nvda_X_test.shape[0], nvda_X_test.shape[1], 1))

lyft_X_train = lyft_X_train.reshape((lyft_X_train.shape[0], lyft_X_train.shape[1], 1))
lyft_X_test = lyft_X_test.reshape((lyft_X_test.shape[0], lyft_X_test.shape[1], 1))

print("\n‚úÖ Sequences created and reshaped for LSTM/GRU input")


üîÑ Creating sequences from standardized splits...



AAPL - Train: (1125, 60), Test: (297, 60)
NVDA - Train: (1125, 60), Test: (297, 60)
LYFT - Train: (1125, 60), Test: (297, 60)

‚úÖ Sequences created and reshaped for LSTM/GRU input


In [69]:
# Step 2: Build LSTM Models

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

tf.random.set_seed(42)
np.random.seed(42)

def build_lstm_model(seq_length=60, units=50, dropout=0.2):
    """Build LSTM model for stock price prediction"""
    model = Sequential([
        LSTM(units=units, return_sequences=True, input_shape=(seq_length, 1)),
        Dropout(dropout),
        LSTM(units=units, return_sequences=False),
        Dropout(dropout),
        Dense(units=25),
        Dense(units=1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

print("="*60)
print("BUILDING LSTM MODELS")
print("="*60)

aapl_lstm = build_lstm_model(SEQUENCE_LENGTH, units=50, dropout=0.2)
nvda_lstm = build_lstm_model(SEQUENCE_LENGTH, units=50, dropout=0.2)
lyft_lstm = build_lstm_model(SEQUENCE_LENGTH, units=50, dropout=0.2)

print(f"\n‚úì Models built with {aapl_lstm.count_params():,} parameters each")
print("\nModel Architecture:")
aapl_lstm.summary()

BUILDING LSTM MODELS



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.




‚úì Models built with 31,901 parameters each

Model Architecture:


In [70]:
# Step 3: Train LSTM Models (Optimized)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
EPOCHS = 25  # Reduced from 50 for faster training
BATCH_SIZE = 64  # Increased for faster training

print("="*60)
print("TRAINING LSTM MODELS (this may take 2-3 minutes per stock)")
print("="*60)

# Train AAPL
print(f"\nüìä Training AAPL LSTM...")
aapl_lstm_history = aapl_lstm.fit(
    aapl_X_train, aapl_y_train,
    epochs=EPOCHS, batch_size=BATCH_SIZE,
    validation_split=0.1, callbacks=[early_stop], 
    verbose=2  # Less verbose output
)
print(f"‚úì AAPL LSTM trained in {len(aapl_lstm_history.history['loss'])} epochs")

# Train NVDA
print(f"\nüìä Training NVDA LSTM...")
nvda_lstm_history = nvda_lstm.fit(
    nvda_X_train, nvda_y_train,
    epochs=EPOCHS, batch_size=BATCH_SIZE,
    validation_split=0.1, callbacks=[early_stop], verbose=2
)
print(f"‚úì NVDA LSTM trained in {len(nvda_lstm_history.history['loss'])} epochs")

# Train LYFT
print(f"\nüìä Training LYFT LSTM...")
lyft_lstm_history = lyft_lstm.fit(
    lyft_X_train, lyft_y_train,
    epochs=EPOCHS, batch_size=BATCH_SIZE,
    validation_split=0.1, callbacks=[early_stop], verbose=2
)
print(f"‚úì LYFT LSTM trained in {len(lyft_lstm_history.history['loss'])} epochs")

print("\n" + "="*60)
print("‚úì All LSTM models trained!")
print("="*60)

TRAINING LSTM MODELS (this may take 2-3 minutes per stock)

üìä Training AAPL LSTM...
Epoch 1/25
16/16 - 2s - 154ms/step - loss: 0.0404 - mae: 0.1528 - val_loss: 0.0260 - val_mae: 0.1545
Epoch 2/25
16/16 - 0s - 31ms/step - loss: 0.0061 - mae: 0.0619 - val_loss: 0.0037 - val_mae: 0.0523
Epoch 3/25
16/16 - 1s - 33ms/step - loss: 0.0037 - mae: 0.0491 - val_loss: 0.0049 - val_mae: 0.0610
Epoch 4/25
16/16 - 0s - 31ms/step - loss: 0.0028 - mae: 0.0431 - val_loss: 0.0029 - val_mae: 0.0458
Epoch 5/25
16/16 - 1s - 31ms/step - loss: 0.0025 - mae: 0.0398 - val_loss: 0.0019 - val_mae: 0.0357
Epoch 6/25
16/16 - 1s - 34ms/step - loss: 0.0021 - mae: 0.0356 - val_loss: 0.0015 - val_mae: 0.0311
Epoch 7/25
16/16 - 1s - 32ms/step - loss: 0.0020 - mae: 0.0342 - val_loss: 0.0023 - val_mae: 0.0395
Epoch 8/25
16/16 - 0s - 30ms/step - loss: 0.0019 - mae: 0.0342 - val_loss: 0.0015 - val_mae: 0.0305
Epoch 9/25
16/16 - 0s - 31ms/step - loss: 0.0016 - mae: 0.0315 - val_loss: 0.0012 - val_mae: 0.0279
Epoch 10/25


In [71]:
# Step 4: Build GRU Models

from tensorflow.keras.layers import GRU

def build_gru_model(seq_length=60, units=50, dropout=0.2):
    """Build GRU model for stock price prediction"""
    model = Sequential([
        GRU(units=units, return_sequences=True, input_shape=(seq_length, 1)),
        Dropout(dropout),
        GRU(units=units, return_sequences=False),
        Dropout(dropout),
        Dense(units=25),
        Dense(units=1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

print("="*60)
print("BUILDING GRU MODELS")
print("="*60)

aapl_gru = build_gru_model(SEQUENCE_LENGTH, units=50, dropout=0.2)
nvda_gru = build_gru_model(SEQUENCE_LENGTH, units=50, dropout=0.2)
lyft_gru = build_gru_model(SEQUENCE_LENGTH, units=50, dropout=0.2)

print(f"\n‚úì GRU models built with {aapl_gru.count_params():,} parameters each")
print(f"üí° GRU has ~25% fewer parameters than LSTM (faster training, less overfitting)")
print("\nModel Architecture:")
aapl_gru.summary()

BUILDING GRU MODELS

‚úì GRU models built with 24,551 parameters each
üí° GRU has ~25% fewer parameters than LSTM (faster training, less overfitting)

Model Architecture:


In [72]:
# Step 5: Train GRU Models (Optimized)

print("="*60)
print("TRAINING GRU MODELS (faster than LSTM!)")
print("="*60)

# Train AAPL
print(f"\nüìä Training AAPL GRU...")
aapl_gru_history = aapl_gru.fit(
    aapl_X_train, aapl_y_train,
    epochs=EPOCHS, batch_size=BATCH_SIZE,
    validation_split=0.1, callbacks=[early_stop], verbose=2
)
print(f"‚úì AAPL GRU trained in {len(aapl_gru_history.history['loss'])} epochs")

# Train NVDA
print(f"\nüìä Training NVDA GRU...")
nvda_gru_history = nvda_gru.fit(
    nvda_X_train, nvda_y_train,
    epochs=EPOCHS, batch_size=BATCH_SIZE,
    validation_split=0.1, callbacks=[early_stop], verbose=2
)
print(f"‚úì NVDA GRU trained in {len(nvda_gru_history.history['loss'])} epochs")

# Train LYFT
print(f"\nüìä Training LYFT GRU...")
lyft_gru_history = lyft_gru.fit(
    lyft_X_train, lyft_y_train,
    epochs=EPOCHS, batch_size=BATCH_SIZE,
    validation_split=0.1, callbacks=[early_stop], verbose=2
)
print(f"‚úì LYFT GRU trained in {len(lyft_gru_history.history['loss'])} epochs")

print("\n" + "="*60)
print("‚úì All GRU models trained!")
print("="*60)

TRAINING GRU MODELS (faster than LSTM!)

üìä Training AAPL GRU...
Epoch 1/25
16/16 - 2s - 144ms/step - loss: 0.0324 - mae: 0.1414 - val_loss: 0.0133 - val_mae: 0.1100
Epoch 2/25
16/16 - 0s - 29ms/step - loss: 0.0060 - mae: 0.0621 - val_loss: 0.0012 - val_mae: 0.0285
Epoch 3/25
16/16 - 0s - 29ms/step - loss: 0.0040 - mae: 0.0505 - val_loss: 0.0069 - val_mae: 0.0776
Epoch 4/25
16/16 - 0s - 29ms/step - loss: 0.0033 - mae: 0.0459 - val_loss: 0.0049 - val_mae: 0.0649
Epoch 5/25
16/16 - 0s - 30ms/step - loss: 0.0025 - mae: 0.0394 - val_loss: 0.0022 - val_mae: 0.0412
‚úì AAPL GRU trained in 5 epochs

üìä Training NVDA GRU...
Epoch 1/25
16/16 - 2s - 149ms/step - loss: 0.0039 - mae: 0.0444 - val_loss: 0.0142 - val_mae: 0.1145
Epoch 2/25
16/16 - 0s - 30ms/step - loss: 6.2134e-04 - mae: 0.0172 - val_loss: 0.0010 - val_mae: 0.0248
Epoch 3/25
16/16 - 0s - 29ms/step - loss: 3.7113e-04 - mae: 0.0128 - val_loss: 0.0027 - val_mae: 0.0451
Epoch 4/25
16/16 - 0s - 30ms/step - loss: 3.3946e-04 - mae: 0.0

In [73]:
# Step 6: Visualize Training History

import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_training_history(lstm_history, gru_history, symbol):
    """Plot LSTM vs GRU training convergence"""
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=(f'{symbol} - LSTM Training', f'{symbol} - GRU Training')
    )
    
    # LSTM
    fig.add_trace(go.Scatter(y=lstm_history.history['loss'], name='Train Loss',
                             line=dict(color='blue', width=2)), row=1, col=1)
    fig.add_trace(go.Scatter(y=lstm_history.history['val_loss'], name='Val Loss',
                             line=dict(color='red', width=2, dash='dash')), row=1, col=1)
    
    # GRU
    fig.add_trace(go.Scatter(y=gru_history.history['loss'], name='Train Loss',
                             line=dict(color='blue', width=2), showlegend=False), row=1, col=2)
    fig.add_trace(go.Scatter(y=gru_history.history['val_loss'], name='Val Loss',
                             line=dict(color='red', width=2, dash='dash'), showlegend=False), row=1, col=2)
    
    fig.update_xaxes(title_text="Epoch", row=1, col=1)
    fig.update_xaxes(title_text="Epoch", row=1, col=2)
    fig.update_yaxes(title_text="Loss (MSE)", row=1, col=1)
    fig.update_yaxes(title_text="Loss (MSE)", row=1, col=2)
    
    fig.update_layout(height=500, template='plotly_white',
                      title_text=f"{symbol} - LSTM vs GRU Training Convergence")
    fig.show()

print("\nüìà Visualizing training histories...")
plot_training_history(aapl_lstm_history, aapl_gru_history, "AAPL")
plot_training_history(nvda_lstm_history, nvda_gru_history, "NVDA")
plot_training_history(lyft_lstm_history, lyft_gru_history, "LYFT")


üìà Visualizing training histories...


---
## Milestone 2 Complete! ‚úÖ

### Summary

We successfully implemented and evaluated LSTM and GRU models for stock price forecasting:

**Key Findings:**
- Both LSTM and GRU models trained successfully with early stopping
- Models learned temporal patterns in the stock price data
- Directional Accuracy shows how well models predict price movement direction
- Performance varies by stock (volatility matters!)

**LSTM vs GRU:**
- LSTM: More parameters, potentially better for long-term dependencies
- GRU: Fewer parameters (~25% less), faster training, less prone to overfitting

---
# Milestone 3: Model Evaluation and Comparison

Now we'll evaluate our LSTM and GRU models using:
- **RMSE**: Root Mean Squared Error (lower is better)
- **MAPE**: Mean Absolute Percentage Error (lower is better)  
- **Directional Accuracy**: How often the model predicts the correct price direction (higher is better)

We'll also compare with our ARIMA baseline from Milestone 1.

In [74]:
# Step 7: Make Predictions and Inverse Transform
# Generate predictions from LSTM and GRU (ARIMA already done in Milestone 1)

print("\nüîÆ Generating predictions from LSTM and GRU models...")

# LSTM Predictions
aapl_lstm_pred_scaled = aapl_lstm.predict(aapl_X_test, verbose=0)
nvda_lstm_pred_scaled = nvda_lstm.predict(nvda_X_test, verbose=0)
lyft_lstm_pred_scaled = lyft_lstm.predict(lyft_X_test, verbose=0)

# GRU Predictions
aapl_gru_pred_scaled = aapl_gru.predict(aapl_X_test, verbose=0)
nvda_gru_pred_scaled = nvda_gru.predict(nvda_X_test, verbose=0)
lyft_gru_pred_scaled = lyft_gru.predict(lyft_X_test, verbose=0)

# Inverse transform to get actual prices
aapl_lstm_pred = aapl_scaler.inverse_transform(aapl_lstm_pred_scaled).flatten()
aapl_gru_pred = aapl_scaler.inverse_transform(aapl_gru_pred_scaled).flatten()
aapl_y_test_actual = aapl_scaler.inverse_transform(aapl_y_test.reshape(-1, 1)).flatten()

nvda_lstm_pred = nvda_scaler.inverse_transform(nvda_lstm_pred_scaled).flatten()
nvda_gru_pred = nvda_scaler.inverse_transform(nvda_gru_pred_scaled).flatten()
nvda_y_test_actual = nvda_scaler.inverse_transform(nvda_y_test.reshape(-1, 1)).flatten()

lyft_lstm_pred = lyft_scaler.inverse_transform(lyft_lstm_pred_scaled).flatten()
lyft_gru_pred = lyft_scaler.inverse_transform(lyft_gru_pred_scaled).flatten()
lyft_y_test_actual = lyft_scaler.inverse_transform(lyft_y_test.reshape(-1, 1)).flatten()

# Use ARIMA forecasts from Milestone 1 (one-shot forecasts)
print("\nüìà Using ARIMA forecasts from Milestone 1...")

aapl_arima_pred = aapl_arima_forecast.values
nvda_arima_pred = nvda_arima_forecast.values
lyft_arima_pred = lyft_arima_forecast.values

# Verify all arrays have same length
print(f"\n‚úÖ Array lengths (all should match):")
print(f"  AAPL: actual={len(aapl_y_test_actual)}, ARIMA={len(aapl_arima_pred)}, LSTM={len(aapl_lstm_pred)}, GRU={len(aapl_gru_pred)}")
print(f"  NVDA: actual={len(nvda_y_test_actual)}, ARIMA={len(nvda_arima_pred)}, LSTM={len(nvda_lstm_pred)}, GRU={len(nvda_gru_pred)}")
print(f"  LYFT: actual={len(lyft_y_test_actual)}, ARIMA={len(lyft_arima_pred)}, LSTM={len(lyft_lstm_pred)}, GRU={len(lyft_gru_pred)}")


üîÆ Generating predictions from LSTM and GRU models...

üìà Using ARIMA forecasts from Milestone 1...

‚úÖ Array lengths (all should match):
  AAPL: actual=297, ARIMA=297, LSTM=297, GRU=297
  NVDA: actual=297, ARIMA=297, LSTM=297, GRU=297
  LYFT: actual=297, ARIMA=297, LSTM=297, GRU=297


In [75]:
# Step 8: Calculate Evaluation Metrics
# Compare ARIMA, LSTM, and GRU using RMSE, MAPE, and Directional Accuracy

from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import numpy as np

def calculate_metrics(y_true, y_pred):
    """Calculate RMSE, MAE, MAPE, and Directional Accuracy"""
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = np.mean(np.abs(y_true - y_pred))
    mape = mean_absolute_percentage_error(y_true, y_pred) * 100
    
    # Directional Accuracy: % of times the model correctly predicts direction
    direction_true = np.diff(y_true) > 0
    direction_pred = np.diff(y_pred) > 0
    da = np.mean(direction_true == direction_pred) * 100
    
    return {'RMSE': rmse, 'MAE': mae, 'MAPE': mape, 'DA': da}

print("\nüìä Calculating metrics for all models...\n")

# AAPL Metrics
aapl_arima_metrics = calculate_metrics(aapl_y_test_actual, aapl_arima_pred)
aapl_lstm_metrics = calculate_metrics(aapl_y_test_actual, aapl_lstm_pred)
aapl_gru_metrics = calculate_metrics(aapl_y_test_actual, aapl_gru_pred)

# NVDA Metrics
nvda_arima_metrics = calculate_metrics(nvda_y_test_actual, nvda_arima_pred)
nvda_lstm_metrics = calculate_metrics(nvda_y_test_actual, nvda_lstm_pred)
nvda_gru_metrics = calculate_metrics(nvda_y_test_actual, nvda_gru_pred)

# LYFT Metrics
lyft_arima_metrics = calculate_metrics(lyft_y_test_actual, lyft_arima_pred)
lyft_lstm_metrics = calculate_metrics(lyft_y_test_actual, lyft_lstm_pred)
lyft_gru_metrics = calculate_metrics(lyft_y_test_actual, lyft_gru_pred)

# Create comparison table
import pandas as pd

results = []
for symbol, arima, lstm, gru in [
    ('AAPL', aapl_arima_metrics, aapl_lstm_metrics, aapl_gru_metrics),
    ('NVDA', nvda_arima_metrics, nvda_lstm_metrics, nvda_gru_metrics),
    ('LYFT', lyft_arima_metrics, lyft_lstm_metrics, lyft_gru_metrics)
]:
    results.append({
        'Stock': symbol,
        'Model': 'ARIMA',
        'RMSE': f"${arima['RMSE']:.2f}",
        'MAE': f"${arima['MAE']:.2f}",
        'MAPE': f"{arima['MAPE']:.2f}%",
        'DA': f"{arima['DA']:.1f}%"
    })
    results.append({
        'Stock': symbol,
        'Model': 'LSTM',
        'RMSE': f"${lstm['RMSE']:.2f}",
        'MAE': f"${lstm['MAE']:.2f}",
        'MAPE': f"{lstm['MAPE']:.2f}%",
        'DA': f"{lstm['DA']:.1f}%"
    })
    results.append({
        'Stock': symbol,
        'Model': 'GRU',
        'RMSE': f"${gru['RMSE']:.2f}",
        'MAE': f"${gru['MAE']:.2f}",
        'MAPE': f"{gru['MAPE']:.2f}%",
        'DA': f"{gru['DA']:.1f}%"
    })

results_df = pd.DataFrame(results)
print("\n" + "="*70)
print("MODEL COMPARISON - ARIMA vs LSTM vs GRU")
print("="*70)
print(results_df.to_string(index=False))
print("="*70)

print("\nüí° Key Insights:")
print("- Lower RMSE/MAE/MAPE = Better accuracy")
print("- Higher Directional Accuracy (DA) = Better trend prediction")
print("- Compare each stock across models to see which performs best")


üìä Calculating metrics for all models...


MODEL COMPARISON - ARIMA vs LSTM vs GRU
Stock Model   RMSE    MAE   MAPE    DA
 AAPL ARIMA $24.29 $19.76  8.34% 52.0%
 AAPL  LSTM  $9.23  $7.30  3.20% 49.0%
 AAPL   GRU $33.33 $32.13 13.87% 52.0%
 NVDA ARIMA $43.01 $34.86 21.49% 49.3%
 NVDA  LSTM $10.69  $9.01  6.27% 50.7%
 NVDA   GRU $32.42 $31.36 21.17% 50.3%
 LYFT ARIMA  $4.50  $3.36 19.10% 53.0%
 LYFT  LSTM  $1.52  $1.07  6.52% 49.0%
 LYFT   GRU  $0.90  $0.60  3.77% 50.7%

üí° Key Insights:
- Lower RMSE/MAE/MAPE = Better accuracy
- Higher Directional Accuracy (DA) = Better trend prediction
- Compare each stock across models to see which performs best


In [76]:
# Step 9: Visualize Predictions vs Actual
# Compare ARIMA, LSTM, and GRU predictions side-by-side

import plotly.graph_objects as go

def plot_model_comparison(y_test, arima_pred, lstm_pred, gru_pred, symbol):
    """Plot actual vs predicted prices for ARIMA, LSTM, and GRU"""
    
    fig = go.Figure()
    
    # Actual prices
    fig.add_trace(go.Scatter(
        y=y_test, 
        name='Actual Price',
        line=dict(color='black', width=3),
        mode='lines'
    ))
    
    # ARIMA predictions
    fig.add_trace(go.Scatter(
        y=arima_pred, 
        name='ARIMA (Classical)',
        line=dict(color='red', width=2, dash='dash'),
        mode='lines'
    ))
    
    # LSTM predictions
    fig.add_trace(go.Scatter(
        y=lstm_pred, 
        name='LSTM (Deep Learning)',
        line=dict(color='blue', width=2, dash='dot'),
        mode='lines'
    ))
    
    # GRU predictions
    fig.add_trace(go.Scatter(
        y=gru_pred, 
        name='GRU (Deep Learning)',
        line=dict(color='green', width=2, dash='dashdot'),
        mode='lines'
    ))
    
    fig.update_layout(
        title=f"{symbol} - Model Comparison: ARIMA vs LSTM vs GRU<br><sub>All models evaluated on identical test set</sub>",
        xaxis_title="Test Sample Index",
        yaxis_title="Price ($)",
        hovermode='x unified',
        height=600,
        template='plotly_white',
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01
        )
    )
    
    fig.show()

print("\nüìä Visualizing model comparisons...\n")
plot_model_comparison(aapl_y_test_actual, aapl_arima_pred, aapl_lstm_pred, aapl_gru_pred, "AAPL")
plot_model_comparison(nvda_y_test_actual, nvda_arima_pred, nvda_lstm_pred, nvda_gru_pred, "NVDA")
plot_model_comparison(lyft_y_test_actual, lyft_arima_pred, lyft_lstm_pred, lyft_gru_pred, "LYFT")

print("\n‚úÖ All model comparison charts generated!")


üìä Visualizing model comparisons...




‚úÖ All model comparison charts generated!
