# News Sentiment Analysis Enhancement

This notebook now includes news sentiment analysis to enhance ARIMA forecasting. The sentiment module provides:

- **Sentiment Proxy**: Based on price movements and volume patterns
- **Technical Sentiment**: Derived from market behavior indicators  
- **Trend Analysis**: Bullish/bearish signal detection
- **Enhanced Features**: Multiple sentiment metrics for model improvement

## Key Improvements for OPEN Stock Analysis:
- **Shortened Lookback**: 6 months for more recent patterns
- **Optimized Forecast**: 60 days for better accuracy
- **Sentiment Integration**: News sentiment features added to model
- **Market Context**: Real-time sentiment assessment

# ARIMA Stock Price Forecasting Analysis

This notebook demonstrates how to build an ARIMA (AutoRegressive Integrated Moving Average) model to forecast stock prices for the OPEN ticker.

## Table of Contents
1. [Setup and Imports](#setup)
2. [Data Loading and Exploration](#data-loading)
3. [Data Preprocessing](#preprocessing)
4. [Stationarity Testing](#stationarity)
5. [ARIMA Model Selection](#model-selection)
6. [Model Fitting and Evaluation](#fitting)
7. [Forecasting](#forecasting)
8. [Results and Conclusions](#results)

## 1. Setup and Imports <a id="setup"></a>

In [None]:
# Install required packages (run this cell first if packages are not installed)
import subprocess
import sys

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Uncomment and run if packages need to be installed
# packages = ['yfinance', 'statsmodels', 'plotly', 'seaborn']
# for package in packages:
#     install_package(package)

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import sys
import os

# Add src directory to path
sys.path.append('../src')

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 8)

print("Libraries imported successfully!")

## 2. Data Loading and Exploration <a id="data-loading"></a>

In [None]:
# Import our custom modules
from data_loader import StockDataLoader
from preprocessor import TimeSeriesPreprocessor
from visualizer import TimeSeriesVisualizer

# Initialize components
loader = StockDataLoader()
preprocessor = TimeSeriesPreprocessor()
visualizer = TimeSeriesVisualizer()

print("Components initialized successfully!")

In [None]:
# Import technical indicators module for enhanced analysis
from technical_indicators import TechnicalIndicators

# Initialize technical analysis calculator
tech_calc = TechnicalIndicators()
print("Technical indicators module loaded!")

In [None]:
    "# Load OPEN stock data
",
    "ticker = 'OPEN'
",
    "period = '1y'  # 1 year of data (research-optimized)
",
    "
",
    "print(f"Loading {ticker} stock data for {period} (research-based optimal period)...")"
data = loader.fetch_stock_data(ticker, period=period)

if data is not None:
    print(f"Data loaded successfully!")
    print(f"Shape: {data.shape}")
    print(f"Date range: {data.index.min()} to {data.index.max()}")
    print(f"Columns: {list(data.columns)}")
else:
    print("Failed to load data. Please check the ticker symbol and try again.")

In [None]:
# Display basic statistics
if data is not None:
    print("Basic Statistics:")
    print(data[['Open', 'High', 'Low', 'Close', 'Volume']].describe())
    
    # Display first and last few rows
    print("\nFirst 5 rows:")
    print(data.head())
    
    print("\nLast 5 rows:")
    print(data.tail())

In [None]:
# Get stock information
stock_info = loader.get_stock_info(ticker)
if stock_info:
    print(f"Stock Information for {ticker}:")
    for key, value in stock_info.items():
        print(f"{key.replace('_', ' ').title()}: {value}")

In [None]:
# Visualize the stock price data
if data is not None:
    visualizer.plot_price_series(data, price_column='Close', ticker=ticker, save=False)

### Technical Analysis Enhancement

While ARIMA is traditionally a univariate model (uses only price history), we can enhance our analysis using technical indicators for better data understanding and preprocessing.

In [None]:
# Calculate technical indicators for enhanced analysis
if data is not None:
    print("Calculating technical indicators...")
    
    # Extract closing prices for technical analysis
    close_prices = data['Close']
    
    # Calculate various technical indicators
    ema_20 = tech_calc.calculate_ema(close_prices, 20)
    ema_50 = tech_calc.calculate_ema(close_prices, 50)
    rsi = tech_calc.calculate_rsi(close_prices)
    stoch_rsi, stoch_k, stoch_d = tech_calc.calculate_stochastic_rsi(close_prices)
    bb_middle, bb_upper, bb_lower = tech_calc.calculate_bollinger_bands(close_prices)
    regime = tech_calc.detect_regime_changes(close_prices)
    
    # Display current market conditions
    print(f"\nCurrent Market Analysis (as of {close_prices.index[-1].strftime('%Y-%m-%d')}):")
    print(f"Price: ${close_prices.iloc[-1]:.2f}")
    print(f"EMA 20: ${ema_20.iloc[-1]:.2f}")
    print(f"EMA 50: ${ema_50.iloc[-1]:.2f}")
    print(f"RSI: {rsi.iloc[-1]:.1f}")
    print(f"Stochastic RSI: {stoch_rsi.iloc[-1]:.1f}")
    print(f"Market Regime: {'Bullish' if regime.iloc[-1] > 0 else 'Bearish' if regime.iloc[-1] < 0 else 'Neutral'}")
    
    # Determine market conditions
    if rsi.iloc[-1] < 30:
        rsi_condition = "Oversold"
    elif rsi.iloc[-1] > 70:
        rsi_condition = "Overbought"
    else:
        rsi_condition = "Normal"
    
    print(f"RSI Condition: {rsi_condition}")
    
    # Price position relative to Bollinger Bands
    bb_position = (close_prices.iloc[-1] - bb_lower.iloc[-1]) / (bb_upper.iloc[-1] - bb_lower.iloc[-1]) * 100
    print(f"Bollinger Bands Position: {bb_position:.1f}% (0%=Lower Band, 100%=Upper Band)")

In [None]:
# Visualize technical indicators
if 'close_prices' in locals():
    fig, axes = plt.subplots(4, 1, figsize=(14, 12))
    
    # Plot 1: Price with EMAs and Bollinger Bands
    axes[0].plot(close_prices.index, close_prices.values, label='Close Price', linewidth=2, color='blue')
    axes[0].plot(ema_20.index, ema_20.values, label='EMA 20', alpha=0.7, color='orange')
    axes[0].plot(ema_50.index, ema_50.values, label='EMA 50', alpha=0.7, color='red')
    axes[0].fill_between(bb_upper.index, bb_lower.values, bb_upper.values, alpha=0.2, color='gray', label='Bollinger Bands')
    axes[0].set_title(f'{ticker} - Price with EMAs and Bollinger Bands', fontweight='bold')
    axes[0].set_ylabel('Price ($)')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Plot 2: RSI
    axes[1].plot(rsi.index, rsi.values, label='RSI', color='purple', linewidth=2)
    axes[1].axhline(y=70, color='red', linestyle='--', alpha=0.7, label='Overbought (70)')
    axes[1].axhline(y=30, color='green', linestyle='--', alpha=0.7, label='Oversold (30)')
    axes[1].fill_between(rsi.index, 30, 70, alpha=0.1, color='gray')
    axes[1].set_title('Relative Strength Index (RSI)', fontweight='bold')
    axes[1].set_ylabel('RSI')
    axes[1].set_ylim(0, 100)
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    # Plot 3: Stochastic RSI
    axes[2].plot(stoch_rsi.index, stoch_rsi.values, label='Stochastic RSI', color='brown', linewidth=2)
    axes[2].plot(stoch_k.index, stoch_k.values, label='%K', color='blue', alpha=0.7)
    axes[2].plot(stoch_d.index, stoch_d.values, label='%D', color='red', alpha=0.7)
    axes[2].axhline(y=80, color='red', linestyle='--', alpha=0.7)
    axes[2].axhline(y=20, color='green', linestyle='--', alpha=0.7)
    axes[2].fill_between(stoch_rsi.index, 20, 80, alpha=0.1, color='gray')
    axes[2].set_title('Stochastic RSI', fontweight='bold')
    axes[2].set_ylabel('Stoch RSI')
    axes[2].set_ylim(0, 100)
    axes[2].legend()
    axes[2].grid(True, alpha=0.3)
    
    # Plot 4: Market Regime
    regime_colors = ['red' if x < 0 else 'green' if x > 0 else 'gray' for x in regime]
    axes[3].scatter(regime.index, regime.values, c=regime_colors, alpha=0.6, s=20)
    axes[3].axhline(y=0, color='black', linestyle='-', alpha=0.5)
    axes[3].set_title('Market Regime (Green=Bullish, Red=Bearish)', fontweight='bold')
    axes[3].set_ylabel('Regime')
    axes[3].set_xlabel('Date')
    axes[3].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\\nTechnical Analysis Summary:")
    print("- These indicators help understand market conditions")
    print("- They are used for data enhancement, not directly in ARIMA")
    print("- Can help identify outliers and regime changes")
    print("- Useful for interpreting ARIMA forecasts in market context")

## 3. Data Preprocessing <a id="preprocessing"></a>

In [None]:
# Extract the closing price series
price_series = preprocessor.extract_price_series(data, 'Close')

print(f"Price series extracted:")
print(f"Length: {len(price_series)}")
print(f"Date range: {price_series.index.min()} to {price_series.index.max()}")
print(f"Price range: ${price_series.min():.2f} to ${price_series.max():.2f}")

In [None]:
# Check for missing values and basic properties
print(f"Missing values: {price_series.isnull().sum()}")
print(f"Data type: {price_series.dtype}")
print(f"\nBasic statistics:")
print(price_series.describe())

## 4. Stationarity Testing <a id="stationarity"></a>

In [None]:
# Test stationarity of the original series
print("Testing stationarity of original price series:")
adf_result = preprocessor.check_stationarity(price_series)

print(f"\nIs the series stationary? {adf_result['is_stationary']}")

In [None]:
# Apply differencing to make the series stationary
if not adf_result['is_stationary']:
    print("Applying first-order differencing...")
    diff_series = preprocessor.difference_series(price_series, order=1)
    
    print("\nTesting stationarity of differenced series:")
    diff_adf_result = preprocessor.check_stationarity(diff_series)
    
    if not diff_adf_result['is_stationary']:
        print("\nApplying second-order differencing...")
        diff2_series = preprocessor.difference_series(price_series, order=2)
        
        print("\nTesting stationarity of second-order differenced series:")
        diff2_adf_result = preprocessor.check_stationarity(diff2_series)
else:
    print("Series is already stationary, no differencing needed.")
    diff_series = price_series

In [None]:
# Visualize original vs transformed series
if 'diff_series' in locals():
    visualizer.plot_stationarity_test(price_series, diff_series, ticker=ticker, save=False)

## 5. ARIMA Model Selection <a id="model-selection"></a>

In [None]:
# Plot ACF and PACF to help determine ARIMA parameters
if 'diff_series' in locals():
    stationary_series = diff_series
else:
    stationary_series = price_series

visualizer.plot_acf_pacf(stationary_series, lags=40, ticker=ticker, save=False)

In [None]:
# Prepare data for ARIMA using our preprocessor
preparation_info = preprocessor.prepare_for_arima(data, 'Close')

print("Data preparation summary:")
print(f"Log transformed: {preparation_info['log_transformed']}")
print(f"Differencing order: {preparation_info['diff_order']}")
print(f"Final series is stationary: {preparation_info['is_stationary']}")
print(f"Final series length: {len(preparation_info['final_series'])}")

In [None]:
# Grid search for best ARIMA parameters
from statsmodels.tsa.arima.model import ARIMA
import itertools

def find_best_arima_params(series, max_p=3, max_d=2, max_q=3):
    """
    Find best ARIMA parameters using grid search with AIC criterion.
    """
    best_aic = float('inf')
    best_params = None
    best_model = None
    
    # Define the parameter ranges
    p_values = range(0, max_p + 1)
    d_values = range(0, max_d + 1)
    q_values = range(0, max_q + 1)
    
    print("Searching for best ARIMA parameters...")
    print("Testing combinations of p, d, q parameters...")
    
    results = []
    
    for p, d, q in itertools.product(p_values, d_values, q_values):
        try:
            model = ARIMA(series, order=(p, d, q))
            fitted_model = model.fit()
            
            aic = fitted_model.aic
            results.append((p, d, q, aic))
            
            if aic < best_aic:
                best_aic = aic
                best_params = (p, d, q)
                best_model = fitted_model
            
            print(f"ARIMA({p},{d},{q}) - AIC: {aic:.2f}")
            
        except Exception as e:
            print(f"ARIMA({p},{d},{q}) - Failed: {str(e)[:50]}")
            continue
    
    print(f"\nBest parameters: ARIMA{best_params}")
    print(f"Best AIC: {best_aic:.2f}")
    
    return best_params, best_model, results

# Find best parameters
best_params, best_model, search_results = find_best_arima_params(preparation_info['original_series'])

In [None]:
# Display top 10 best models
if search_results:
    print("Top 10 best ARIMA models:")
    sorted_results = sorted(search_results, key=lambda x: x[3])
    for i, (p, d, q, aic) in enumerate(sorted_results[:10]):
        print(f"{i+1:2d}. ARIMA({p},{d},{q}) - AIC: {aic:.2f}")

## 6. Model Fitting and Evaluation <a id="fitting"></a>

## News Sentiment Analysis

Let's incorporate news sentiment analysis to enhance our ARIMA forecasting:

In [None]:
# Import sentiment analyzer
from news_sentiment import NewsSentimentAnalyzer

# Initialize sentiment analyzer
sentiment_analyzer = NewsSentimentAnalyzer('OPEN')

# Get sentiment features for the last 6 months
sentiment_data = sentiment_analyzer.get_sentiment_features(days_back=180)
print(f"Sentiment data shape: {sentiment_data.shape}")
print(f"Available sentiment features: {sentiment_data.columns.tolist()}")

# Display recent sentiment
print("\n📊 Recent Sentiment Data:")
print(sentiment_data[['sentiment_score', 'sentiment_ma_7', 'bullish_signal', 'bearish_signal']].tail(10))

In [None]:
# Visualize sentiment trends
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Plot 1: Sentiment Score Over Time
axes[0].plot(sentiment_data.index, sentiment_data['sentiment_score'], 
             color='purple', alpha=0.7, label='Daily Sentiment')
axes[0].plot(sentiment_data.index, sentiment_data['sentiment_ma_7'], 
             color='red', linewidth=2, label='7-Day MA')
axes[0].axhline(y=0, color='black', linestyle='--', alpha=0.5)
axes[0].axhline(y=0.2, color='green', linestyle=':', alpha=0.5, label='Bullish Threshold')
axes[0].axhline(y=-0.2, color='red', linestyle=':', alpha=0.5, label='Bearish Threshold')
axes[0].set_title('OPEN - News Sentiment Analysis', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Sentiment Score')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: Sentiment Momentum 
axes[1].plot(sentiment_data.index, sentiment_data['sentiment_momentum'], 
             color='orange', alpha=0.7, label='Sentiment Momentum')
axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
axes[1].set_title('Sentiment Momentum (Change in Sentiment)', fontsize=12)
axes[1].set_ylabel('Momentum')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Plot 3: Bullish vs Bearish Signals
bullish_dates = sentiment_data[sentiment_data['bullish_signal'] == 1].index
bearish_dates = sentiment_data[sentiment_data['bearish_signal'] == 1].index

axes[2].scatter(bullish_dates, [1]*len(bullish_dates), 
                color='green', alpha=0.7, s=30, label=f'Bullish Days ({len(bullish_dates)})')
axes[2].scatter(bearish_dates, [0]*len(bearish_dates), 
                color='red', alpha=0.7, s=30, label=f'Bearish Days ({len(bearish_dates)})')
axes[2].set_title('Bullish vs Bearish Signal Days', fontsize=12)
axes[2].set_ylabel('Signal Type')
axes[2].set_xlabel('Date')
axes[2].set_yticks([0, 1])
axes[2].set_yticklabels(['Bearish', 'Bullish'])
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Get current sentiment summary
sentiment_summary = sentiment_analyzer.get_current_sentiment_summary()
print("\n📈 Current Market Sentiment Summary:")
for key, value in sentiment_summary.items():
    if isinstance(value, float):
        print(f"  {key.replace('_', ' ').title()}: {value:.3f}")
    else:
        print(f"  {key.replace('_', ' ').title()}: {value}")

In [None]:
# Fit the best ARIMA model
if best_model is not None:
    print(f"\nFitted ARIMA{best_params} Model Summary:")
    print(f"AIC: {best_model.aic:.2f}")
    print(f"BIC: {best_model.bic:.2f}")
    print(f"Log Likelihood: {best_model.llf:.2f}")
    
    # Display model parameters
    print(f"\nModel Parameters:")
    print(best_model.summary())

In [None]:
# Analyze residuals
if best_model is not None:
    residuals = best_model.resid
    
    print("Residual Analysis:")
    print(f"Mean of residuals: {residuals.mean():.6f}")
    print(f"Standard deviation of residuals: {residuals.std():.4f}")
    
    # Plot residuals
    visualizer.plot_residuals(residuals, ticker=ticker, save=False)

In [None]:
# Model diagnostics - Ljung-Box test for residual autocorrelation
from statsmodels.stats.diagnostic import acorr_ljungbox

if best_model is not None:
    # Ljung-Box test
    lb_test = acorr_ljungbox(residuals, lags=10, return_df=True)
    
    print("Ljung-Box Test for Residual Autocorrelation:")
    print("H0: Residuals are independently distributed")
    print("H1: Residuals are not independently distributed")
    print(f"\nTest Statistics and p-values:")
    print(lb_test)
    
    # Check if any p-value is less than 0.05
    significant_autocorr = (lb_test['lb_pvalue'] < 0.05).any()
    
    if significant_autocorr:
        print("\n⚠️  Warning: Significant autocorrelation detected in residuals")
    else:
        print("\n✅ Good: No significant autocorrelation in residuals")

## 7. Forecasting <a id="forecasting"></a>

In [None]:
# Generate forecasts
if best_model is not None:
    forecast_steps = 90  # Forecast 90 days ahead
    alpha = 0.05  # 95% confidence interval
    
    print(f"Generating {forecast_steps}-day forecast...")
    
    # Generate forecast
    forecast_result = best_model.get_forecast(steps=forecast_steps, alpha=alpha)
    forecast_values = forecast_result.predicted_mean
    conf_int = forecast_result.conf_int()
    
    # Create forecast dates
    last_date = price_series.index[-1]
    forecast_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), 
                                 periods=forecast_steps, freq='D')
    
    # Create forecast DataFrame
    forecast_df = pd.DataFrame({
        'forecast': forecast_values,
        'lower_ci': conf_int.iloc[:, 0],
        'upper_ci': conf_int.iloc[:, 1]
    }, index=forecast_dates)
    
    print(f"\nForecast Summary:")
    print(f"Next day predicted price: ${forecast_values.iloc[0]:.2f}")
    print(f"90-day average predicted price: ${forecast_values.mean():.2f}")
    print(f"Confidence level: {(1-alpha)*100}%")
    
    print(f"\nFirst 10 days forecast:")
    print(forecast_df.head(10))

In [None]:
# Visualize forecast
if 'forecast_df' in locals():
    visualizer.plot_forecast(
        price_series,
        forecast_df['forecast'],
        confidence_intervals=(forecast_df['lower_ci'], forecast_df['upper_ci']),
        ticker=ticker,
        save=False
    )

In [None]:
# Create interactive forecast plot
if 'forecast_df' in locals():
    visualizer.create_interactive_forecast(
        price_series,
        forecast_df['forecast'],
        confidence_intervals=(forecast_df['lower_ci'], forecast_df['upper_ci']),
        ticker=ticker,
        save=False
    )

## 8. Model Validation and Results <a id="results"></a>

In [None]:
# Perform out-of-sample validation
from sklearn.metrics import mean_absolute_error, mean_squared_error

def validate_model(series, model_params, test_size=0.2):
    """
    Validate ARIMA model using train-test split.
    """
    # Split data
    split_point = int(len(series) * (1 - test_size))
    train_data = series[:split_point]
    test_data = series[split_point:]
    
    print(f"Training data: {len(train_data)} observations")
    print(f"Test data: {len(test_data)} observations")
    
    # Fit model on training data
    train_model = ARIMA(train_data, order=model_params)
    fitted_train_model = train_model.fit()
    
    # Generate forecasts for test period
    forecast_steps = len(test_data)
    forecast_result = fitted_train_model.get_forecast(steps=forecast_steps)
    forecast_values = forecast_result.predicted_mean
    
    # Calculate metrics
    mae = mean_absolute_error(test_data.values, forecast_values)
    mse = mean_squared_error(test_data.values, forecast_values)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((test_data.values - forecast_values) / test_data.values)) * 100
    
    # Calculate directional accuracy
    actual_direction = np.diff(test_data.values) > 0
    predicted_direction = np.diff(forecast_values) > 0
    directional_accuracy = np.mean(actual_direction == predicted_direction) * 100
    
    return {
        'mae': mae,
        'mse': mse,
        'rmse': rmse,
        'mape': mape,
        'directional_accuracy': directional_accuracy,
        'train_data': train_data,
        'test_data': test_data,
        'forecast_values': forecast_values
    }

# Perform validation
if best_params is not None:
    validation_results = validate_model(price_series, best_params)
    
    print(f"\nModel Validation Results:")
    print(f"Mean Absolute Error (MAE): ${validation_results['mae']:.2f}")
    print(f"Root Mean Square Error (RMSE): ${validation_results['rmse']:.2f}")
    print(f"Mean Absolute Percentage Error (MAPE): {validation_results['mape']:.2f}%")
    print(f"Directional Accuracy: {validation_results['directional_accuracy']:.2f}%")

In [None]:
# Plot validation results
if 'validation_results' in locals():
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Plot actual vs predicted for test period
    test_data = validation_results['test_data']
    forecast_values = validation_results['forecast_values']
    
    ax.plot(test_data.index, test_data.values, label='Actual Prices', linewidth=2)
    ax.plot(test_data.index, forecast_values, label='Predicted Prices', linewidth=2, linestyle='--')
    
    ax.set_title(f'{ticker} - Model Validation: Actual vs Predicted Prices', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Price ($)')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Summary of results
print("="*60)
print(f"ARIMA FORECASTING ANALYSIS SUMMARY")
print(f"Stock: {ticker}")
print("="*60)

if 'stock_info' in locals() and stock_info:
    print(f"Company: {stock_info.get('name', 'N/A')}")
    print(f"Sector: {stock_info.get('sector', 'N/A')}")

print(f"\nData Information:")
print(f"Total observations: {len(price_series)}")
print(f"Date range: {price_series.index.min().strftime('%Y-%m-%d')} to {price_series.index.max().strftime('%Y-%m-%d')}")
print(f"Price range: ${price_series.min():.2f} to ${price_series.max():.2f}")

if best_params is not None:
    print(f"\nBest ARIMA Model: {best_params}")
    print(f"Model AIC: {best_model.aic:.2f}")

if 'validation_results' in locals():
    print(f"\nModel Performance:")
    print(f"RMSE: ${validation_results['rmse']:.2f}")
    print(f"MAPE: {validation_results['mape']:.2f}%")
    print(f"Directional Accuracy: {validation_results['directional_accuracy']:.2f}%")

if 'forecast_df' in locals():
    print(f"\nForecast Results:")
    print(f"Next day prediction: ${forecast_df['forecast'].iloc[0]:.2f}")
    print(f"90-day average prediction: ${forecast_df['forecast'].mean():.2f}")
    print(f"Forecast range: ${forecast_df['forecast'].min():.2f} to ${forecast_df['forecast'].max():.2f}")

print("\n" + "="*60)
print("Analysis Complete!")
print("="*60)

# 🔬 Multi-Model Forecasting Analysis

This section demonstrates advanced forecasting models including LSTM neural networks, GARCH volatility models, and Monte Carlo simulations.

## 🧠 LSTM Neural Network Model

Long Short-Term Memory (LSTM) networks are a type of recurrent neural network capable of learning long-term dependencies in time series data.

In [None]:
# Import LSTM model
from src.lstm_model import LSTMForecaster

# Initialize LSTM model
print("🧠 Setting up LSTM Neural Network Model")
print("=" * 50)

lstm_model = LSTMForecaster(
    sequence_length=60,  # Use 60 days of history
    epochs=50,           # Training epochs (reduced for demo)
    batch_size=32        # Batch size for training
)

# Prepare data for LSTM
print("\n📊 Preparing data for LSTM training...")
prices = data['Close'].values
X_train, y_train, X_test, y_test, train_data, test_data = lstm_model.prepare_data(
    prices, train_size=0.8
)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

# Train the LSTM model
print("\n🏋️ Training LSTM model...")
if len(X_train) > 0 and len(X_test) > 0:
    lstm_model.train(X_train, y_train, X_test, y_test)
    
    # Generate forecasts
    print("\n🔮 Generating LSTM forecasts...")
    last_sequence = train_data[-lstm_model.sequence_length:]
    lstm_forecast = lstm_model.forecast(last_sequence, steps=60)
    
    # Evaluate model performance
    print("\n📈 Evaluating LSTM model performance...")
    lstm_metrics = lstm_model.evaluate(X_test, y_test)
    
    for metric, value in lstm_metrics.items():
        print(f"{metric}: {value:.4f}")
    
    print(f"\n🎯 LSTM Next Day Prediction: ${lstm_forecast[0]:.2f}")
    print(f"🎯 LSTM 60-Day Average: ${np.mean(lstm_forecast):.2f}")
else:
    print("❌ Insufficient data for LSTM training")

## 📊 GARCH Volatility Model

GARCH (Generalized Autoregressive Conditional Heteroskedasticity) models are used for volatility forecasting and risk management.

In [None]:
# Import GARCH model
from src.garch_model import GARCHForecaster

# Initialize GARCH model
print("📊 Setting up GARCH Volatility Model")
print("=" * 50)

garch_model = GARCHForecaster(model_type='GARCH', p=1, q=1)

# Prepare returns data
print("\n📈 Preparing returns for GARCH modeling...")
returns = garch_model.prepare_returns(data['Close'])

# Test for ARCH effects
print("\n🔍 Testing for ARCH effects...")
arch_test = garch_model.test_arch_effects()

# Fit GARCH model
print("\n⚙️ Fitting GARCH(1,1) model...")
try:
    garch_model.fit(returns)
    
    # Forecast volatility
    print("\n🔮 Forecasting volatility...")
    vol_forecast = garch_model.forecast_volatility(horizon=60)
    
    # Calculate Value at Risk
    print("\n⚠️ Calculating Value at Risk...")
    var_result = garch_model.calculate_var(
        vol_forecast['volatility_forecast'],
        confidence_level=0.05,
        position_value=100000  # $100,000 position
    )
    
    print(f"Average forecasted volatility: {np.mean(vol_forecast['volatility_forecast']):.2f}%")
    print(f"1-day VaR (95%): ${var_result['var_absolute'][0]:,.2f}")
    print(f"1-day Expected Shortfall: ${var_result['expected_shortfall_absolute'][0]:,.2f}")
    
    # Generate return forecasts with confidence intervals
    return_forecast = garch_model.forecast_returns(vol_forecast['volatility_forecast'])
    
    # Convert to price forecasts
    current_price = data['Close'].iloc[-1]
    garch_price_forecast = current_price * (1 + return_forecast['mean_forecast'] / 100)
    
    print(f"\n🎯 GARCH Next Day Price: ${garch_price_forecast[0]:.2f}")
    
except Exception as e:
    print(f"❌ GARCH model failed: {str(e)}")
    garch_model = None

## 🎲 Monte Carlo Simulation Models

Monte Carlo simulations use random sampling to model different price paths and calculate probabilities of various outcomes.

In [None]:
# Import Monte Carlo model
from src.monte_carlo import MonteCarloForecaster

print("🎲 Setting up Monte Carlo Simulation Models")
print("=" * 50)

# Test different Monte Carlo models
mc_models = ['GBM', 'Jump', 'Heston']
mc_results = {}

for model_type in mc_models:
    print(f"\n🔄 Running {model_type} Monte Carlo simulation...")
    
    try:
        # Initialize Monte Carlo model
        mc_model = MonteCarloForecaster(
            model_type=model_type,
            n_simulations=5000,  # 5,000 simulations
            time_horizon=60      # 60-day forecast
        )
        
        # Estimate parameters from historical data
        mc_model.estimate_parameters(data['Close'])
        
        # Run simulation
        simulation_results = mc_model.run_simulation()
        
        # Calculate risk measures
        var_es = mc_model.calculate_var_es()
        
        # Store results
        mc_results[model_type] = {
            'model': mc_model,
            'results': simulation_results,
            'var_es': var_es
        }
        
        # Print summary
        print(f"✅ {model_type} simulation completed")
        print(f"   Final price mean: ${simulation_results['summary_stats']['final_mean']:.2f}")
        print(f"   Final price std: ${simulation_results['summary_stats']['final_std']:.2f}")
        print(f"   Probability of gain: {simulation_results['summary_stats']['prob_positive']:.1%}")
        print(f"   VaR (95%): {var_es['VaR_95']:.1%}")
        
    except Exception as e:
        print(f"❌ {model_type} simulation failed: {str(e)}")
        mc_results[model_type] = None

# Print comparison
print(f"\n📊 Monte Carlo Model Comparison")
print("=" * 50)
current_price = data['Close'].iloc[-1]

for model_type, result in mc_results.items():
    if result is not None:
        final_mean = result['results']['summary_stats']['final_mean']
        return_pct = (final_mean - current_price) / current_price * 100
        print(f"{model_type:>10}: ${final_mean:6.2f} ({return_pct:+5.1f}%)")
    else:
        print(f"{model_type:>10}: Failed")

## 🔄 Multi-Model Comparison & Ensemble

Compare all models and create ensemble forecasts for better prediction accuracy.

In [None]:
# Run complete multi-model analysis
from src.multi_model_forecast import MultiModelForecaster

print("🚀 Running Complete Multi-Model Analysis")
print("=" * 60)

# Initialize the multi-model forecaster
multi_forecaster = MultiModelForecaster(
    symbol='OPEN',
    period='6mo',
    forecast_days=60
)

# Run the complete analysis
try:
    multi_forecaster.run_complete_analysis(save_plots=True, save_report=True)
    
    print("\n✅ Multi-model analysis completed successfully!")
    print("📁 Check the 'results' folder for:")
    print("   • Multi-model forecast plot")
    print("   • Comprehensive forecast report")
    
except Exception as e:
    print(f"❌ Multi-model analysis failed: {str(e)}")
    print("Running individual model comparison...")
    
    # Manual model comparison if multi-model fails
    current_price = data['Close'].iloc[-1]
    
    print(f"\n📈 Model Forecast Comparison")
    print("=" * 40)
    print(f"Current Price: ${current_price:.2f}")
    print("-" * 40)
    
    # ARIMA forecast
    if 'forecast' in locals():
        arima_next = forecast.iloc[0]
        arima_final = forecast.iloc[-1]
        print(f"ARIMA        : ${arima_next:.2f} → ${arima_final:.2f}")
    
    # LSTM forecast
    if 'lstm_forecast' in locals():
        lstm_next = lstm_forecast[0]
        lstm_final = lstm_forecast[-1]
        print(f"LSTM         : ${lstm_next:.2f} → ${lstm_final:.2f}")
    
    # GARCH forecast
    if 'garch_price_forecast' in locals():
        garch_next = garch_price_forecast[0]
        garch_final = garch_price_forecast[-1]
        print(f"GARCH        : ${garch_next:.2f} → ${garch_final:.2f}")
    
    # Monte Carlo forecasts
    for model_type, result in mc_results.items():
        if result is not None:
            mc_forecast = result['results']['summary_stats']['mean_path'][1:]  # Exclude initial price
            mc_next = mc_forecast[0]
            mc_final = mc_forecast[-1]
            print(f"MC-{model_type:8s}: ${mc_next:.2f} → ${mc_final:.2f}")
    
    print("-" * 40)