In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from ta.momentum import RSIIndicator, StochasticOscillator
from ta.trend import MACD
from tqdm import tqdm

# Ticker symbols for the 12 stocks
tickers = [
    "601398.SS", "601988.SS", "601939.SS", "601288.SS", "600036.SS",
    "601328.SS", "601318.SS", "601628.SS", "601601.SS", "600016.SS",
    "601688.SS", "000776.SZ"
]

# Download historical OHLCV from 2014-01-01 to 2023-12-31
def download_data(ticker):
    data = yf.download(ticker, start="2014-01-01", end="2023-12-31")
    data = data.dropna()
    return data

stock_data = {ticker: download_data(ticker) for ticker in tqdm(tickers)}

  0%|          | 0/12 [00:00<?, ?it/s]

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12/12 [00:04<00:00,  2.42it/s]


In [2]:
def compute_features(df):
    df = df.copy()

    # Ensure Close, High, Low are Series (1D)
    close = df["Close"].squeeze()
    high = df["High"].squeeze()
    low = df["Low"].squeeze()
    volume = df["Volume"].squeeze()

    print(f"Close dtype: {type(close)}, shape: {close.shape}")

    # Daily return
    df["Return"] = close.pct_change()

    # 30-day rolling volatility (target)
    df["Volatility"] = df["Return"].rolling(window=30).std()

    # RSI (14 days)
    df["RSI"] = RSIIndicator(close=close, window=14).rsi()

    # Momentum (5 days)
    df["MOM"] = close - close.shift(5)

    # OBV
    df["OBV"] = (np.sign(close.diff()) * volume).fillna(0).cumsum()

    # MACD
    macd = MACD(close=close, window_slow=26, window_fast=12, window_sign=9)
    df["MACD_LINE"] = macd.macd()
    df["MACD_SIGNAL"] = macd.macd_signal()
    df["MACD_HIST"] = macd.macd_diff()

    # Stochastic Oscillator
    stoch = StochasticOscillator(high=high, low=low, close=close, window=14, smooth_window=3)
    df["STO_K"] = stoch.stoch()           # formerly %K
    df["STO_D"] = stoch.stoch_signal()    # formerly %D

    # Lagged volatilities (t-1 to t-6)
    for i in range(1, 7):
        df[f"Vol_t_{i}"] = df["Volatility"].shift(i)

    # Volatility t+1 (our target)
    df["Vol_target"] = df["Volatility"].shift(-1)

    # Drop rows with NaNs
    df = df.dropna()

    return df

In [3]:
import os
import pickle

feature_data = {}
for ticker in tqdm(tickers):
    feature_data[ticker] = compute_features(stock_data[ticker])

feature_data_path = "china_feature_data.pkl"

if os.path.exists(feature_data_path):
    print("üì¶ Loading saved feature data from china_feature_data.pkl...")
    with open(feature_data_path, "rb") as f:
        feature_data = pickle.load(f)
else:
    print("‚öôÔ∏è Computing feature data...")
    feature_data = {ticker: compute_features(stock_data[ticker]) for ticker in tqdm(tickers)}
    with open(feature_data_path, "wb") as f:
        pickle.dump(feature_data, f)
    print("üíæ Saved feature data to china_feature_data.pkl")

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12/12 [00:00<00:00, 80.49it/s]


Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
‚öôÔ∏è Computing feature data...


  0%|          | 0/12 [00:00<?, ?it/s]

Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12/12 [00:00<00:00, 82.62it/s]


Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
Close dtype: <class 'pandas.core.series.Series'>, shape: (2432,)
üíæ Saved feature data to china_feature_data.pkl


In [4]:
from arch import arch_model
import warnings

def add_garch_predictions(df, ticker=None, verbose=True):
    df = df.copy()
    returns = df["Return"].dropna().values
    preds = []
    window_size = 500
    scale_factor = 100  # recommended by arch package

    if verbose:
        print(f"\nüîç GARCH modeling for {ticker} ‚Äî total points: {len(returns)}")

    for i in range(window_size, len(returns)):
        train_window = returns[i-window_size:i] * scale_factor  # rescale

        try:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                model = arch_model(train_window, vol='Garch', p=1, q=1, dist='normal', rescale=False)
                model_fit = model.fit(disp="off")
                forecast = model_fit.forecast(horizon=1)
                pred_vol_scaled = np.sqrt(forecast.variance.values[-1][0])
                pred_vol = pred_vol_scaled / scale_factor  # unscale
        except Exception as e:
            if verbose:
                print(f"‚ö†Ô∏è Failed at i={i} ‚Äî {e}")
            pred_vol = np.nan

        preds.append(pred_vol)

        if verbose and i % 250 == 0:
            print(f"  ‚Üí Index {i} | Pred Vol (unscaled): {pred_vol:.5f}")

    full_preds = [np.nan] * window_size + preds
    df["GARCH_pred"] = full_preds

    before = len(df)
    df = df.dropna()
    after = len(df)

    if verbose:
        print(f"‚úÖ Done {ticker} | Rows dropped: {before - after} | Final: {after} rows")

    return df

In [5]:
# === Try loading precomputed garch_data from disk ===
garch_data_path = "china_garch_data.pkl"

if os.path.exists(garch_data_path):
    print("üì¶ Loading saved GARCH data from china_garch_data.pkl...")
    with open(garch_data_path, "rb") as f:
        garch_data = pickle.load(f)
    print("‚úÖ Loaded GARCH data successfully!")
else:
    print("‚öôÔ∏è Computing GARCH data from scratch...")
    garch_data = {}
    for ticker in tickers:
        print(f"\n====================== {ticker} ======================")
        garch_data[ticker] = add_garch_predictions(feature_data[ticker], ticker=ticker)

    # Save to disk
    with open(garch_data_path, "wb") as f:
        pickle.dump(garch_data, f)
    print("üíæ Saved GARCH data to china_garch_data.pkl")

‚öôÔ∏è Computing GARCH data from scratch...


üîç GARCH modeling for 601398.SS ‚Äî total points: 2395
  ‚Üí Index 500 | Pred Vol (unscaled): 0.01529
  ‚Üí Index 750 | Pred Vol (unscaled): 0.00571
  ‚Üí Index 1000 | Pred Vol (unscaled): 0.01758
  ‚Üí Index 1250 | Pred Vol (unscaled): 0.01197
  ‚Üí Index 1500 | Pred Vol (unscaled): 0.00824
  ‚Üí Index 1750 | Pred Vol (unscaled): 0.01567
  ‚Üí Index 2000 | Pred Vol (unscaled): 0.00726
  ‚Üí Index 2250 | Pred Vol (unscaled): 0.01931
‚úÖ Done 601398.SS | Rows dropped: 500 | Final: 1895 rows


üîç GARCH modeling for 601988.SS ‚Äî total points: 2395
  ‚Üí Index 500 | Pred Vol (unscaled): 0.01635
  ‚Üí Index 750 | Pred Vol (unscaled): 0.00773
  ‚Üí Index 1000 | Pred Vol (unscaled): 0.01273
  ‚Üí Index 1250 | Pred Vol (unscaled): 0.00911
  ‚Üí Index 1500 | Pred Vol (unscaled): 0.00980
  ‚Üí Index 1750 | Pred Vol (unscaled): 0.00710
  ‚Üí Index 2000 | Pred Vol (unscaled): 0.00505
  ‚Üí Index 2250 | Pred Vol (unscaled): 0.02613
‚úÖ Done 601988.

In [6]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def evaluate(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "RMSE": mean_squared_error(y_true, y_pred, squared=False),
        "MSE": mean_squared_error(y_true, y_pred),
        "MAE": mean_absolute_error(y_true, y_pred),
    }

def train_ml_models_baseline(df, ticker="TICKER"):
    print(f"\nüìà Training ML models for {ticker}...")

    # Feature and target selection
    features = [
        'RSI', 'MOM', 'OBV', 'MACD_LINE', 'MACD_SIGNAL', 'MACD_HIST',
        'STO_K', 'STO_D',
        'Vol_t_1', 'Vol_t_2', 'Vol_t_3', 'Vol_t_4', 'Vol_t_5', 'Vol_t_6'
    ]

    X = df[features].copy()
    # Sanitize column names just in case LightGBM is sensitive
    X.columns = [str(col).replace("-", "_").replace("%", "PCT").replace(".", "_DOT_") for col in X.columns]

    y = df["Vol_target"]

    # Static train-test split (same as paper: 2014‚Äì2020 train, 2021‚Äì2023 test)
    split_idx = int(len(df) * 0.7)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    models = {
        "KNN": KNeighborsRegressor(),
        "AdaBoost": AdaBoostRegressor(),
        "CatBoost": CatBoostRegressor(verbose=0),
        #"LightGBM": LGBMRegressor(),
        "XGBoost": XGBRegressor(verbosity=0),
        "RandomForest": RandomForestRegressor()
    }

    results = {}

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        metrics = evaluate(y_test, y_pred)
        results[name] = metrics
        print(f"‚úÖ {name} ‚Äî R¬≤: {metrics['R2']:.4f}, RMSE: {metrics['RMSE']:.4f}, MAE: {metrics['MAE']:.4f}")

    return results

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


In [7]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return {
        "R2": r2_score(y_true, y_pred),
        "RMSE": np.sqrt(mse),
        "MSE": mse,
        "MAE": mean_absolute_error(y_true, y_pred),
    }

def train_all_stocks_ml_baseline(garch_data_dict, results_path="china_ml_baseline_results.csv"):
    # Check if results already exist
    if os.path.exists(results_path):
        print(f"üì¶ Loading existing results from {results_path}...")
        return pd.read_csv(results_path)

    final_results = []

    for ticker, df in garch_data_dict.items():
        print(f"\n================= {ticker} =================")
        results = train_ml_models_baseline(df, ticker=ticker)

        for model_name, metrics in results.items():
            final_results.append({
                "Stock": ticker,
                "Model": model_name,
                "R2": round(metrics["R2"], 4),
                "RMSE": round(metrics["RMSE"], 4),
                "MSE": round(metrics["MSE"], 6),
                "MAE": round(metrics["MAE"], 4),
            })

    # Save results
    results_df = pd.DataFrame(final_results)
    results_df.to_csv(results_path, index=False)
    print(f"üíæ Saved results to {results_path}")

    return results_df

# Run training or load existing results
ml_all_results = train_all_stocks_ml_baseline(garch_data)
ml_all_results_sorted = ml_all_results.sort_values(by="R2", ascending=False)
display(ml_all_results_sorted)



üìà Training ML models for 601398.SS...
‚úÖ KNN ‚Äî R¬≤: -2.4385, RMSE: 0.0064, MAE: 0.0055
‚úÖ AdaBoost ‚Äî R¬≤: 0.7848, RMSE: 0.0016, MAE: 0.0013
‚úÖ CatBoost ‚Äî R¬≤: 0.9492, RMSE: 0.0008, MAE: 0.0005
‚úÖ XGBoost ‚Äî R¬≤: 0.9284, RMSE: 0.0009, MAE: 0.0006
‚úÖ RandomForest ‚Äî R¬≤: 0.9438, RMSE: 0.0008, MAE: 0.0005


üìà Training ML models for 601988.SS...
‚úÖ KNN ‚Äî R¬≤: 0.0187, RMSE: 0.0055, MAE: 0.0042
‚úÖ AdaBoost ‚Äî R¬≤: 0.8451, RMSE: 0.0022, MAE: 0.0013
‚úÖ CatBoost ‚Äî R¬≤: 0.8192, RMSE: 0.0023, MAE: 0.0011
‚úÖ XGBoost ‚Äî R¬≤: 0.8392, RMSE: 0.0022, MAE: 0.0012
‚úÖ RandomForest ‚Äî R¬≤: 0.8686, RMSE: 0.0020, MAE: 0.0010


üìà Training ML models for 601939.SS...
‚úÖ KNN ‚Äî R¬≤: -2.6311, RMSE: 0.0068, MAE: 0.0059
‚úÖ AdaBoost ‚Äî R¬≤: 0.7301, RMSE: 0.0019, MAE: 0.0014
‚úÖ CatBoost ‚Äî R¬≤: 0.8568, RMSE: 0.0014, MAE: 0.0010
‚úÖ XGBoost ‚Äî R¬≤: 0.8888, RMSE: 0.0012, MAE: 0.0008
‚úÖ RandomForest ‚Äî R¬≤: 0.9101, RMSE: 0.0011, MAE: 0.0007


üìà Training ML models for 60128

Unnamed: 0,Stock,Model,R2,RMSE,MSE,MAE
18,601288.SS,XGBoost,0.9622,0.0008,1e-06,0.0005
2,601398.SS,CatBoost,0.9492,0.0008,1e-06,0.0005
4,601398.SS,RandomForest,0.9438,0.0008,1e-06,0.0005
19,601288.SS,RandomForest,0.9438,0.001,1e-06,0.0007
3,601398.SS,XGBoost,0.9284,0.0009,1e-06,0.0006
44,601601.SS,RandomForest,0.9124,0.0017,3e-06,0.0011
17,601288.SS,CatBoost,0.9122,0.0013,2e-06,0.0007
14,601939.SS,RandomForest,0.9101,0.0011,1e-06,0.0007
43,601601.SS,XGBoost,0.9091,0.0017,3e-06,0.0012
29,601328.SS,RandomForest,0.9082,0.0012,1e-06,0.0008


In [8]:
from arch import arch_model
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

def evaluate_series(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "RMSE": mean_squared_error(y_true, y_pred, squared=False),
        "MSE": mean_squared_error(y_true, y_pred),
        "MAE": mean_absolute_error(y_true, y_pred),
    }

def forecast_volatility_arch(df, model_type="GARCH", ticker="TICKER", verbose=True):
    df = df.copy()

    # Flatten columns if MultiIndex
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = ['_'.join([str(i) for i in col if i]) for col in df.columns]

    if "Vol_target" not in df.columns or "Return" not in df.columns:
        raise KeyError(f"Missing 'Vol_target' or 'Return' in {ticker}")

    returns = df["Return"].dropna().values
    preds = []
    window_size = 500
    scale_factor = 100  # fix for scale warning

    if verbose:
        print(f"\nüîÆ Running {model_type} for {ticker}...")

    for i in range(window_size, len(returns)):
        train_window = returns[i-window_size:i] * scale_factor

        try:
            if model_type == "GARCH":
                model = arch_model(train_window, vol='GARCH', p=1, q=1, dist='normal', rescale=False)
            elif model_type == "GJR":
                model = arch_model(train_window, vol='GARCH', p=1, o=1, q=1, dist='normal', rescale=False)
            elif model_type == "EGARCH":
                model = arch_model(train_window, vol='EGARCH', p=1, q=1, dist='normal', rescale=False)
            else:
                raise ValueError("Invalid model_type")

            model_fit = model.fit(disp="off")
            forecast = model_fit.forecast(horizon=1)
            pred_vol = np.sqrt(forecast.variance.values[-1][0]) / scale_factor

        except Exception as e:
            if verbose:
                print(f"‚ö†Ô∏è {model_type} failed at index {i}: {e}")
            pred_vol = np.nan

        preds.append(pred_vol)

        if verbose and i % 250 == 0:
            print(f"  ‚Üí {model_type} | index {i} | vol: {pred_vol:.5f}")

    df[f"{model_type}_pred"] = [np.nan] * window_size + preds
    df = df.dropna(subset=["Vol_target", f"{model_type}_pred"])

    metrics = evaluate_series(df["Vol_target"], df[f"{model_type}_pred"])
    if verbose:
        print(f"‚úÖ {model_type} for {ticker} ‚Äî R¬≤: {metrics['R2']:.4f}, RMSE: {metrics['RMSE']:.4f}, MAE: {metrics['MAE']:.4f}")

    return df, metrics

In [9]:
import os
import pandas as pd

def evaluate_all_series_models(garch_data_dict, results_path="china_ts_model_results.csv"):
    # If results already exist, load them
    if os.path.exists(results_path):
        print(f"üì¶ Loading saved time series results from {results_path}...")
        return pd.read_csv(results_path)

    results = []

    for ticker, df in garch_data_dict.items():
        for model_type in ["GARCH", "GJR", "EGARCH"]:
            print(f"\n================= {ticker} - {model_type} =================")
            try:
                _, metrics = forecast_volatility_arch(df, model_type=model_type, ticker=ticker, verbose=True)
                results.append({
                    "Stock": ticker,
                    "Model": model_type,
                    "R2": round(metrics["R2"], 4),
                    "RMSE": round(metrics["RMSE"], 4),
                    "MSE": round(metrics["MSE"], 6),
                    "MAE": round(metrics["MAE"], 4),
                })
            except Exception as e:
                print(f"‚ö†Ô∏è Skipping {ticker} - {model_type}: {e}")

    df_results = pd.DataFrame(results)
    df_results.to_csv(results_path, index=False)
    print(f"üíæ Saved time series model results to {results_path}")

    return df_results

ts_model_results = evaluate_all_series_models(garch_data)
ts_model_results_sorted = ts_model_results.sort_values(by="R2", ascending=False)
display(ts_model_results_sorted)



üîÆ Running GARCH for 601398.SS...
  ‚Üí GARCH | index 500 | vol: 0.01758
  ‚Üí GARCH | index 750 | vol: 0.01197
  ‚Üí GARCH | index 1000 | vol: 0.00824
  ‚Üí GARCH | index 1250 | vol: 0.01567
  ‚Üí GARCH | index 1500 | vol: 0.00726
  ‚Üí GARCH | index 1750 | vol: 0.01931
‚úÖ GARCH for 601398.SS ‚Äî R¬≤: 0.4657, RMSE: 0.0031, MAE: 0.0022


üîÆ Running GJR for 601398.SS...
  ‚Üí GJR | index 500 | vol: 0.01344
  ‚Üí GJR | index 750 | vol: 0.01218
  ‚Üí GJR | index 1000 | vol: 0.00829
  ‚Üí GJR | index 1250 | vol: 0.01658
  ‚Üí GJR | index 1500 | vol: 0.00658
  ‚Üí GJR | index 1750 | vol: 0.01832
‚úÖ GJR for 601398.SS ‚Äî R¬≤: 0.4465, RMSE: 0.0031, MAE: 0.0022


üîÆ Running EGARCH for 601398.SS...
  ‚Üí EGARCH | index 500 | vol: 0.01598
  ‚Üí EGARCH | index 750 | vol: 0.01242
  ‚Üí EGARCH | index 1000 | vol: 0.00842
  ‚Üí EGARCH | index 1250 | vol: 0.01420
  ‚Üí EGARCH | index 1500 | vol: 0.00777


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.



  ‚Üí EGARCH | index 1750 | vol: 0.01535
‚úÖ EGARCH for 601398.SS ‚Äî R¬≤: 0.4655, RMSE: 0.0031, MAE: 0.0022


üîÆ Running GARCH for 601988.SS...
  ‚Üí GARCH | index 500 | vol: 0.01273
  ‚Üí GARCH | index 750 | vol: 0.00911
  ‚Üí GARCH | index 1000 | vol: 0.00980
  ‚Üí GARCH | index 1250 | vol: 0.00710
  ‚Üí GARCH | index 1500 | vol: 0.00505
  ‚Üí GARCH | index 1750 | vol: 0.02613
‚úÖ GARCH for 601988.SS ‚Äî R¬≤: 0.6200, RMSE: 0.0027, MAE: 0.0019


üîÆ Running GJR for 601988.SS...
  ‚Üí GJR | index 500 | vol: 0.00894
  ‚Üí GJR | index 750 | vol: 0.00953
  ‚Üí GJR | index 1000 | vol: 0.01000
  ‚Üí GJR | index 1250 | vol: 0.00706
  ‚Üí GJR | index 1500 | vol: 0.00481
  ‚Üí GJR | index 1750 | vol: 0.02733
‚úÖ GJR for 601988.SS ‚Äî R¬≤: 0.5024, RMSE: 0.0031, MAE: 0.0020


üîÆ Running EGARCH for 601988.SS...
  ‚Üí EGARCH | index 500 | vol: 0.01238
  ‚Üí EGARCH | index 750 | vol: 0.00905
  ‚Üí EGARCH | index 1000 | vol: 0.01006
  ‚Üí EGARCH | index 1250 | vol: 0.00739
  ‚Üí EGARCH | index

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.



  ‚Üí GJR | index 750 | vol: 0.01249
  ‚Üí GJR | index 1000 | vol: 0.00876
  ‚Üí GJR | index 1250 | vol: 0.00964
  ‚Üí GJR | index 1500 | vol: 0.00551
  ‚Üí GJR | index 1750 | vol: 0.01936
‚úÖ GJR for 601288.SS ‚Äî R¬≤: 0.6249, RMSE: 0.0026, MAE: 0.0018


üîÆ Running EGARCH for 601288.SS...
  ‚Üí EGARCH | index 500 | vol: 0.01891


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit

  ‚Üí EGARCH | index 750 | vol: 0.01171


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



  ‚Üí EGARCH | index 1000 | vol: 0.00874
  ‚Üí EGARCH | index 1250 | vol: 0.00934
  ‚Üí EGARCH | index 1500 | vol: 0.00566
  ‚Üí EGARCH | index 1750 | vol: 0.01746
‚úÖ EGARCH for 601288.SS ‚Äî R¬≤: -17976199.6745, RMSE: 17.7967, MAE: 0.4818


üîÆ Running GARCH for 600036.SS...
  ‚Üí GARCH | index 500 | vol: 0.01767
  ‚Üí GARCH | index 750 | vol: 0.01869
  ‚Üí GARCH | index 1000 | vol: 0.01575
  ‚Üí GARCH | index 1250 | vol: 0.01641
  ‚Üí GARCH | index 1500 | vol: 0.02105
  ‚Üí GARCH | index 1750 | vol: 0.01715
‚úÖ GARCH for 600036.SS ‚Äî R¬≤: 0.4278, RMSE: 0.0038, MAE: 0.0029


üîÆ Running GJR for 600036.SS...
  ‚Üí GJR | index 500 | vol: 0.01351
  ‚Üí GJR | index 750 | vol: 0.01889
  ‚Üí GJR | index 1000 | vol: 0.01545
  ‚Üí GJR | index 1250 | vol: 0.01594
  ‚Üí GJR | index 1500 | vol: 0.02016
  ‚Üí GJR | index 1750 | vol: 0.01758
‚úÖ GJR for 600036.SS ‚Äî R¬≤: 0.3218, RMSE: 0.0041, MAE: 0.0031


üîÆ Running EGARCH for 600036.SS...
  ‚Üí EGARCH | index 500 | vol: 0.01756


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_sls

  ‚Üí EGARCH | index 750 | vol: 0.00043


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality cons

  ‚Üí EGARCH | index 1000 | vol: 0.01588
  ‚Üí EGARCH | index 1250 | vol: 0.01742
  ‚Üí EGARCH | index 1500 | vol: 0.02099
  ‚Üí EGARCH | index 1750 | vol: 0.01803
‚úÖ EGARCH for 600036.SS ‚Äî R¬≤: -522828581147430489053995415306217339643862575006714727425726544721170826090325772908144819272072847314040252850214629498649707996402339047248908772295222232595102586706811340363663428685505716464394844825660172137819292108826628590594475714682889645516605874222443038719092868001057585480380252960977846272.0000, RMSE: 3589803495847321968573347710569562866790391348954115321529010433816748805564286754642289273436758147834151857032376772169338599801476622081812921843712.0000, MAE: 96113318494211016191246457818745077868755192106337729491446439506092628392330099663671517917867336549798729447500086090008613583446197960489167749120.0000


üîÆ Running GARCH for 601328.SS...
  ‚Üí GARCH | index 500 | vol: 0.00986
  ‚Üí GARCH | index 750 | vol: 0.01023
  ‚Üí GARCH | index 1000 | vol: 0.00808
  ‚Üí GA

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



  ‚Üí EGARCH | index 750 | vol: 0.01903
  ‚Üí EGARCH | index 1000 | vol: 0.01550
  ‚Üí EGARCH | index 1250 | vol: 0.01589
  ‚Üí EGARCH | index 1500 | vol: 0.01570
  ‚Üí EGARCH | index 1750 | vol: 0.01813
‚úÖ EGARCH for 601318.SS ‚Äî R¬≤: -1.0626, RMSE: 0.0064, MAE: 0.0028


üîÆ Running GARCH for 601628.SS...
  ‚Üí GARCH | index 500 | vol: 0.01545
  ‚Üí GARCH | index 750 | vol: 0.02153
  ‚Üí GARCH | index 1000 | vol: 0.02095
  ‚Üí GARCH | index 1250 | vol: 0.02168
  ‚Üí GARCH | index 1500 | vol: 0.01809
  ‚Üí GARCH | index 1750 | vol: 0.02313
‚úÖ GARCH for 601628.SS ‚Äî R¬≤: 0.5654, RMSE: 0.0049, MAE: 0.0034


üîÆ Running GJR for 601628.SS...
  ‚Üí GJR | index 500 | vol: 0.01460
  ‚Üí GJR | index 750 | vol: 0.02264
  ‚Üí GJR | index 1000 | vol: 0.02144
  ‚Üí GJR | index 1250 | vol: 0.02183
  ‚Üí GJR | index 1500 | vol: 0.01847
  ‚Üí GJR | index 1750 | vol: 0.02245
‚úÖ GJR for 601628.SS ‚Äî R¬≤: 0.5378, RMSE: 0.0050, MAE: 0.0035


üîÆ Running EGARCH for 601628.SS...
  ‚Üí EGARCH | ind

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



  ‚Üí EGARCH | index 1750 | vol: 0.02329


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.



‚úÖ EGARCH for 601628.SS ‚Äî R¬≤: -0.1010, RMSE: 0.0078, MAE: 0.0041


üîÆ Running GARCH for 601601.SS...
  ‚Üí GARCH | index 500 | vol: 0.02952
  ‚Üí GARCH | index 750 | vol: 0.02412
  ‚Üí GARCH | index 1000 | vol: 0.02086
  ‚Üí GARCH | index 1250 | vol: 0.01996
  ‚Üí GARCH | index 1500 | vol: 0.02072
  ‚Üí GARCH | index 1750 | vol: 0.02783
‚úÖ GARCH for 601601.SS ‚Äî R¬≤: 0.3668, RMSE: 0.0042, MAE: 0.0033


üîÆ Running GJR for 601601.SS...
  ‚Üí GJR | index 500 | vol: 0.02607
  ‚Üí GJR | index 750 | vol: 0.02412
  ‚Üí GJR | index 1000 | vol: 0.02015
  ‚Üí GJR | index 1250 | vol: 0.01957
  ‚Üí GJR | index 1500 | vol: 0.02063
  ‚Üí GJR | index 1750 | vol: 0.02980
‚úÖ GJR for 601601.SS ‚Äî R¬≤: 0.3396, RMSE: 0.0043, MAE: 0.0033


üîÆ Running EGARCH for 601601.SS...
  ‚Üí EGARCH | index 500 | vol: 0.03087


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See

  ‚Üí EGARCH | index 750 | vol: 0.00000


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optim

  ‚Üí EGARCH | index 1000 | vol: 0.02079
  ‚Üí EGARCH | index 1250 | vol: 0.02046


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_sls

  ‚Üí EGARCH | index 1500 | vol: 0.02162
  ‚Üí EGARCH | index 1750 | vol: 0.02810
‚úÖ EGARCH for 601601.SS ‚Äî R¬≤: -924002029041058243442911752157334063499666473963544556679302342538066876154394634514644782137266543859607119963174571217793784570770003417445564872750528874895032699027921903790925098490386258658797881306691921645898972853525748068591108381904155392933644973399195779569850508862636814548902196258761867264.0000, RMSE: 5076748790081631296603906381518373793897197025883263738461302424957646334268459028735119109494222007899926389698064484779259624046047393241323058233344.0000, MAE: 192226636988422032382492915637490155737510384212675458982892879012185256784660199327343035835734673099597458895000172180017227166892395920978335498240.0000


üîÆ Running GARCH for 600016.SS...
  ‚Üí GARCH | index 500 | vol: 0.01093
  ‚Üí GARCH | index 750 | vol: 0.01046
  ‚Üí GARCH | index 1000 | vol: 0.00809
  ‚Üí GARCH | index 1250 | vol: 0.00810
  ‚Üí GARCH | index 1500 | vol: 0.00915
  ‚Üí GAR

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.



  ‚Üí GJR | index 750 | vol: 0.02772
  ‚Üí GJR | index 1000 | vol: 0.02139
  ‚Üí GJR | index 1250 | vol: 0.01637
  ‚Üí GJR | index 1500 | vol: 0.01608
  ‚Üí GJR | index 1750 | vol: 0.01607
‚úÖ GJR for 601688.SS ‚Äî R¬≤: 0.5938, RMSE: 0.0042, MAE: 0.0034


üîÆ Running EGARCH for 601688.SS...
  ‚Üí EGARCH | index 500 | vol: 0.02166


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optim

  ‚Üí EGARCH | index 750 | vol: 0.02665


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optim

  ‚Üí EGARCH | index 1000 | vol: 0.02221
  ‚Üí EGARCH | index 1250 | vol: 0.01704
  ‚Üí EGARCH | index 1500 | vol: 0.01576
  ‚Üí EGARCH | index 1750 | vol: 0.01639


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.



‚úÖ EGARCH for 601688.SS ‚Äî R¬≤: -4953661286185424207936298941239263232.0000, RMSE: 14742012936515040.0000, MAE: 394702324584652.2500


üîÆ Running GARCH for 000776.SZ...
  ‚Üí GARCH | index 500 | vol: 0.01443
  ‚Üí GARCH | index 750 | vol: 0.02097
  ‚Üí GARCH | index 1000 | vol: 0.01442
  ‚Üí GARCH | index 1250 | vol: 0.01614
  ‚Üí GARCH | index 1500 | vol: 0.02475
  ‚Üí GARCH | index 1750 | vol: 0.01767
‚úÖ GARCH for 000776.SZ ‚Äî R¬≤: 0.6584, RMSE: 0.0049, MAE: 0.0035


üîÆ Running GJR for 000776.SZ...
  ‚Üí GJR | index 500 | vol: 0.01432
  ‚Üí GJR | index 750 | vol: 0.02181
  ‚Üí GJR | index 1000 | vol: 0.01404
  ‚Üí GJR | index 1250 | vol: 0.01604
  ‚Üí GJR | index 1500 | vol: 0.02458
  ‚Üí GJR | index 1750 | vol: 0.01771
‚úÖ GJR for 000776.SZ ‚Äî R¬≤: 0.6350, RMSE: 0.0050, MAE: 0.0036


üîÆ Running EGARCH for 000776.SZ...
  ‚Üí EGARCH | index 500 | vol: 0.01522
  ‚Üí EGARCH | index 750 | vol: 0.02201
  ‚Üí EGARCH | index 1000 | vol: 0.01427
  ‚Üí EGARCH | index 1250 | vol: 0.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See

‚úÖ EGARCH for 000776.SZ ‚Äî R¬≤: -31.1433, RMSE: 0.0472, MAE: 0.0054
üíæ Saved time series model results to china_ts_model_results.csv


Unnamed: 0,Stock,Model,R2,RMSE,MSE,MAE
33,000776.SZ,GARCH,0.6584,0.0049,2.4e-05,0.0035
5,601988.SS,EGARCH,0.6404,0.0026,7e-06,0.0018
34,000776.SZ,GJR,0.635,0.005,2.5e-05,0.0036
9,601288.SS,GARCH,0.6329,0.0025,6e-06,0.0018
10,601288.SS,GJR,0.6249,0.0026,7e-06,0.0018
3,601988.SS,GARCH,0.62,0.0027,7e-06,0.0019
30,601688.SS,GARCH,0.6142,0.0041,1.7e-05,0.0033
8,601939.SS,EGARCH,0.6009,0.0031,9e-06,0.0024
31,601688.SS,GJR,0.5938,0.0042,1.8e-05,0.0034
21,601628.SS,GARCH,0.5654,0.0049,2.4e-05,0.0034


In [10]:
def train_fusion_model(df, model_name, ts_feature="GARCH_pred", ticker="TICKER"):
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
    from sklearn.neighbors import KNeighborsRegressor
    from catboost import CatBoostRegressor
    from lightgbm import LGBMRegressor
    from xgboost import XGBRegressor

    models = {
        "KNN": KNeighborsRegressor(),
        "AdaBoost": AdaBoostRegressor(),
        "CatBoost": CatBoostRegressor(verbose=0),
        "XGBoost": XGBRegressor(verbosity=0),
        "RandomForest": RandomForestRegressor()
    }

    if model_name not in models:
        raise ValueError(f"Model '{model_name}' not recognized.")

    df = df.copy()

    # Flatten if needed
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = ['_'.join([str(i) for i in col if i]) for col in df.columns]

    if ts_feature not in df.columns:
        raise ValueError(f"'{ts_feature}' not found in DataFrame for {ticker}")

    feature_cols = [
        'RSI', 'MOM', 'OBV', 'MACD_LINE', 'MACD_SIGNAL', 'MACD_HIST',
        'STO_K', 'STO_D', 'Vol_t_1', 'Vol_t_2', 'Vol_t_3',
        'Vol_t_4', 'Vol_t_5', 'Vol_t_6', ts_feature
    ]

    X = df[feature_cols].copy()
    y = df["Vol_target"]

    split_idx = int(len(df) * 0.7)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    model = models[model_name]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    metrics = evaluate_series(y_test, y_pred)

    print(f"‚úÖ {model_name} + {ts_feature} for {ticker} ‚Äî R¬≤: {metrics['R2']:.4f}, RMSE: {metrics['RMSE']:.4f}, MAE: {metrics['MAE']:.4f}")

    return metrics

In [11]:
import os
import pandas as pd

# Reuse existing forecast function
for ticker in tqdm(garch_data.keys()):
    for model_type in ["GJR", "EGARCH"]:
        print(f"\nüìà Adding {model_type}_pred to {ticker}...")
        df = garch_data[ticker]

        try:
            df, _ = forecast_volatility_arch(df, model_type=model_type, ticker=ticker, verbose=False)
            garch_data[ticker] = df  # Update with new column
        except Exception as e:
            print(f"‚ö†Ô∏è {model_type} failed for {ticker}: {e}")

def train_all_fusion_models(garch_data_dict, results_path="china_fusion_model_results.csv"):
    # Load existing results if file exists
    if os.path.exists(results_path):
        print(f"üì¶ Loading saved fusion model results from {results_path}...")
        return pd.read_csv(results_path)

    results = []

    for ticker, df in garch_data_dict.items():
        for ts_feature in ["GARCH_pred", "GJR_pred", "EGARCH_pred"]:
            if ts_feature not in df.columns:
                print(f"‚ö†Ô∏è Skipping {ticker} - missing {ts_feature}")
                continue

            for model_name in ["RandomForest", "XGBoost", "CatBoost", "AdaBoost", "KNN"]:
                try:
                    metrics = train_fusion_model(df, model_name, ts_feature=ts_feature, ticker=ticker)
                    results.append({
                        "Stock": ticker,
                        "Fusion_Model": f"{ts_feature}+{model_name}",
                        "R2": round(metrics["R2"], 4),
                        "RMSE": round(metrics["RMSE"], 4),
                        "MSE": round(metrics["MSE"], 6),
                        "MAE": round(metrics["MAE"], 4),
                    })
                except Exception as e:
                    print(f"‚ö†Ô∏è {ticker} {ts_feature}+{model_name} failed: {e}")

    fusion_df = pd.DataFrame(results)
    fusion_df.to_csv(results_path, index=False)
    print(f"üíæ Saved fusion model results to {results_path}")

    return fusion_df

fusion_results_df = train_all_fusion_models(garch_data)
fusion_results_sorted = fusion_results_df.sort_values(by="R2", ascending=False)
display(fusion_results_sorted)

  0%|          | 0/12 [00:00<?, ?it/s]


üìà Adding GJR_pred to 601398.SS...

üìà Adding EGARCH_pred to 601398.SS...


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

  8%|‚ñä         | 1/12 [00:35<06:26, 35.15s/it]


üìà Adding GJR_pred to 601988.SS...

üìà Adding EGARCH_pred to 601988.SS...


 17%|‚ñà‚ñã        | 2/12 [01:10<05:53, 35.35s/it]


üìà Adding GJR_pred to 601939.SS...

üìà Adding EGARCH_pred to 601939.SS...


 25%|‚ñà‚ñà‚ñå       | 3/12 [01:46<05:18, 35.42s/it]


üìà Adding GJR_pred to 601288.SS...


Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.




üìà Adding EGARCH_pred to 601288.SS...


 33%|‚ñà‚ñà‚ñà‚ñé      | 4/12 [02:22<04:44, 35.61s/it]


üìà Adding GJR_pred to 600036.SS...

üìà Adding EGARCH_pred to 600036.SS...


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 5/12 [02:56<04:05, 35.03s/it]


üìà Adding GJR_pred to 601328.SS...

üìà Adding EGARCH_pred to 601328.SS...


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 6/12 [03:29<03:27, 34.66s/it]


üìà Adding GJR_pred to 601318.SS...

üìà Adding EGARCH_pred to 601318.SS...


 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 7/12 [04:02<02:50, 34.06s/it]


üìà Adding GJR_pred to 601628.SS...

üìà Adding EGARCH_pred to 601628.SS...


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 8/12 [04:38<02:18, 34.64s/it]


üìà Adding GJR_pred to 601601.SS...

üìà Adding EGARCH_pred to 601601.SS...


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_sls


üìà Adding GJR_pred to 600016.SS...

üìà Adding EGARCH_pred to 600016.SS...


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 10/12 [05:47<01:08, 34.41s/it]


üìà Adding GJR_pred to 601688.SS...


Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.




üìà Adding EGARCH_pred to 601688.SS...


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 11/12 [06:25<00:35, 35.58s/it]


üìà Adding GJR_pred to 000776.SZ...

üìà Adding EGARCH_pred to 000776.SZ...


Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See scipy.optimize.fmin_slsqp for code meaning.

Iteration limit reached
See

‚úÖ RandomForest + GARCH_pred for 601398.SS ‚Äî R¬≤: 0.8882, RMSE: 0.0014, MAE: 0.0009
‚úÖ XGBoost + GARCH_pred for 601398.SS ‚Äî R¬≤: 0.8544, RMSE: 0.0016, MAE: 0.0010
‚úÖ CatBoost + GARCH_pred for 601398.SS ‚Äî R¬≤: 0.8656, RMSE: 0.0016, MAE: 0.0011
‚úÖ AdaBoost + GARCH_pred for 601398.SS ‚Äî R¬≤: 0.7935, RMSE: 0.0019, MAE: 0.0014
‚úÖ KNN + GARCH_pred for 601398.SS ‚Äî R¬≤: 0.0676, RMSE: 0.0041, MAE: 0.0030
‚úÖ RandomForest + GJR_pred for 601398.SS ‚Äî R¬≤: 0.8914, RMSE: 0.0014, MAE: 0.0009
‚úÖ XGBoost + GJR_pred for 601398.SS ‚Äî R¬≤: 0.8580, RMSE: 0.0016, MAE: 0.0010
‚úÖ CatBoost + GJR_pred for 601398.SS ‚Äî R¬≤: 0.8725, RMSE: 0.0015, MAE: 0.0010
‚úÖ AdaBoost + GJR_pred for 601398.SS ‚Äî R¬≤: 0.7744, RMSE: 0.0020, MAE: 0.0013
‚úÖ KNN + GJR_pred for 601398.SS ‚Äî R¬≤: 0.0676, RMSE: 0.0041, MAE: 0.0030
‚úÖ RandomForest + EGARCH_pred for 601398.SS ‚Äî R¬≤: 0.8775, RMSE: 0.0015, MAE: 0.0010
‚úÖ XGBoost + EGARCH_pred for 601398.SS ‚Äî R¬≤: 0.8554, RMSE: 0.0016, MAE: 0.0010
‚úÖ CatBoost 

Unnamed: 0,Stock,Fusion_Model,R2,RMSE,MSE,MAE
38,601939.SS,GJR_pred+AdaBoost,0.9454,0.0009,0.000001,0.0007
43,601939.SS,EGARCH_pred+AdaBoost,0.9423,0.0009,0.000001,0.0008
33,601939.SS,GARCH_pred+AdaBoost,0.9420,0.0010,0.000001,0.0007
35,601939.SS,GJR_pred+RandomForest,0.9361,0.0010,0.000001,0.0008
42,601939.SS,EGARCH_pred+CatBoost,0.9310,0.0010,0.000001,0.0008
...,...,...,...,...,...,...
69,600036.SS,GJR_pred+KNN,-1.2963,0.0078,0.000061,0.0062
74,600036.SS,EGARCH_pred+KNN,-1.2963,0.0078,0.000061,0.0062
169,000776.SZ,GARCH_pred+KNN,-2.1375,0.0106,0.000113,0.0092
174,000776.SZ,GJR_pred+KNN,-2.1375,0.0106,0.000113,0.0092
