In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import warnings

warnings.filterwarnings("ignore")

pio.renderers.default = 'notebook_connected'

def mean_absolute_percentage_error(y_true, y_pred):
    y_true_safe = np.where(y_true == 0, np.finfo(float).eps, y_true)
    return np.mean(np.abs((y_true - y_pred) / y_true_safe)) * 100

def run_forecast(ticker):
    df = yf.download(ticker, start="2015-01-01", progress=False)
    if 'Close' not in df.columns:
        raise ValueError(f"'Close' column not found in data for {ticker}")

    df = df[['Close', 'Open', 'High', 'Low', 'Volume']].copy()
    df.dropna(inplace=True)

    max_lag = 5
    for lag in range(1, max_lag + 1):
        df[f'Close_lag_{lag}'] = df['Close'].shift(lag)

    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df.dropna(inplace=True)

    test_size = 30
    tscv = TimeSeriesSplit(n_splits=5)
    if len(df) < test_size + 1 or len(df) < tscv.n_splits + 1:
        raise ValueError("Not enough data for training/testing or cross-validation.")

    X = df.drop('Close', axis=1)
    y = df['Close']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    model = XGBRegressor(objective='reg:squarederror', random_state=42)
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.05, 0.1]
    }

    grid_search = GridSearchCV(model, param_grid, cv=tscv, scoring='neg_root_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_scaled, y)
    best_model = grid_search.best_estimator_

    X_train, X_test = X_scaled[:-test_size], X_scaled[-test_size:]
    y_train, y_test = y[:-test_size], y[-test_size:]

    best_model.fit(X_train, y_train)
    preds = best_model.predict(X_test)

    y_test_np = y_test.values
    rmse = np.sqrt(mean_squared_error(y_test_np, preds))
    mae = mean_absolute_error(y_test_np, preds)
    r2 = r2_score(y_test_np, preds)
    mape = mean_absolute_percentage_error(y_test_np, preds)

    print(f"\n📊 Backtest Metrics for {ticker}: RMSE={rmse:.2f}, MAE={mae:.2f}, R²={r2:.4f}, MAPE={mape:.2f}%")

    last_row_scaled = scaler.transform(X.iloc[[-1]])
    future_pred = best_model.predict(last_row_scaled)[0]

    return {
        'ticker': ticker,
        'actual': y_test_np,
        'predicted': preds,
        'future_pred': future_pred,
        'dates': y_test.index
    }

def plot_forecast(result):
    df_plot = pd.DataFrame({
        'Date': pd.Series(result['dates']).values.flatten(),
        'Actual': np.array(result['actual']).flatten(),
        'Predicted': np.array(result['predicted']).flatten()
    })

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_plot['Date'], y=df_plot['Actual'],
                             mode='lines+markers', name='Actual', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=df_plot['Date'], y=df_plot['Predicted'],
                             mode='lines+markers', name='Predicted', line=dict(color='orange', dash='dot')))
    fig.update_layout(
        title=f"{result['ticker']} Stock Forecast - Actual vs Predicted",
        xaxis_title='Date',
        yaxis_title='Price (USD)',
        legend=dict(x=0, y=1),
        template='plotly_white'
    )
    fig.show()

if __name__ == '__main__':
    tickers = ['META', 'AMZN', 'AAPL', 'NFLX', 'GOOG']

    from ipywidgets import interact

    def run_and_plot(ticker):
        try:
            result = run_forecast(ticker)
            print(f"\n🔮 Next trading day forecast for {ticker}: {result['future_pred']:.2f}")
            plot_forecast(result)
        except Exception as e:
            print(f"❌ Error with {ticker}: {e}")

    interact(run_and_plot, ticker=tickers)  