In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import RobustScaler  # Thay StandardScaler bằng RobustScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.feature_selection import SelectFromModel
import os
import logging
from datetime import datetime
import warnings
from tqdm import tqdm
import seaborn as sns
from joblib import Parallel, delayed
import time

img_dir = 'ml_model_results'
os.makedirs(img_dir, exist_ok=True)

logging.basicConfig(
    filename=f'{img_dir}/ml_models_log.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore")

CONFIG = {
    'forecast_horizon': 12,
    'seasonal_periods': 12,
    'min_data_length': 12,
    'img_dir': img_dir,
    'results_file': f'{img_dir}/ml_model_results.csv',
    'n_jobs': -1,
    'lags': [1, 2, 3],
    'rolling_windows': [3, 6],
    'cv_splits': 5,
}

def validate_input_data(df, required_columns):
    logger.info(f"Kiểm tra dữ liệu đầu vào: {df.shape}")
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Thiếu các cột bắt buộc: {required_columns}")
    if df.index.duplicated().any():
        raise ValueError("Index chứa giá trị trùng lặp!")
    if not df.index.is_monotonic_increasing:
        raise ValueError("Index không được sắp xếp tăng dần!")
    nan_counts = df[required_columns].isnull().sum()
    if nan_counts.any():
        logger.error(f"Dữ liệu chứa giá trị thiếu: {nan_counts.to_dict()}")
        raise ValueError(f"Dữ liệu chứa giá trị thiếu: {nan_counts.to_dict()}")
    inf_counts = df[required_columns].replace([np.inf, -np.inf], np.nan).isnull().sum()
    if inf_counts.any():
        raise ValueError("Dữ liệu chứa giá trị vô cực!")
    if not all(df[required_columns].dtypes.apply(lambda x: np.issubdtype(x, np.number))):
        raise ValueError("Một số cột không phải kiểu số!")
    # Thêm kiểm tra phân phối
    for col in required_columns:
        logger.info(f"Thống kê {col}: {df[col].describe()}")
        if df[col].var() < 1e-8:
            logger.warning(f"Phương sai của {col} quá thấp: {df[col].var()}")

def create_features(data, target, lags=CONFIG['lags'], rolling_windows=CONFIG['rolling_windows']):
    logger.info(f"Tạo đặc trưng cho {target}, kích thước dữ liệu: {data.shape}")
    df = data.copy()
    exog_var = 'cpi_yoy' if target == 'cpi_mom' else 'cpi_mom'
    
    # Xử lý outliers trước khi tạo đặc trưng
    for col in [target, exog_var]:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df[col] = df[col].clip(lower_bound, upper_bound)
        logger.info(f"Đã clip outliers cho {col}: min={df[col].min()}, max={df[col].max()}")
    
    for lag in lags:
        df[f'{target}_lag_{lag}'] = df[target].shift(lag)
        df[f'{exog_var}_lag_{lag}'] = df[exog_var].shift(lag)
    
    for window in rolling_windows:
        df[f'{target}_roll_mean_{window}'] = df[target].rolling(window=window).mean()
        df[f'{target}_roll_std_{window}'] = df[target].rolling(window=window).std()
        df[f'{exog_var}_roll_mean_{window}'] = df[exog_var].rolling(window=window).mean()
    
    df[f'{exog_var}_current'] = df[exog_var]
    df['month'] = df.index.month
    df['quarter'] = df.index.quarter
    
    # Xử lý NaN bằng rolling mean
    for col in df.columns:
        if df[col].isnull().any():
            df[col] = df[col].fillna(df[col].rolling(window=3, min_periods=1).mean())
            if df[col].isnull().any():
                df[col] = df[col].fillna(df[col].mean())
                logger.info(f"Điền NaN trong cột {col} bằng trung bình: {df[col].mean()}")
    
    nan_counts = df.isnull().sum()
    if nan_counts.any():
        logger.error(f"Dữ liệu vẫn chứa NaN sau khi xử lý: {nan_counts.to_dict()}")
        raise ValueError("Dữ liệu vẫn chứa NaN sau khi xử lý!")
    
    logger.info(f"Đặc trưng sau khi xử lý: {list(df.columns)}")
    return df

def calculate_metrics(actual, predicted):
    actual = np.array(actual, dtype=float)
    predicted = np.array(predicted, dtype=float)
    logger.info(f"Kiểm tra calculate_metrics: actual={actual[:5]}, predicted={predicted[:5]}")
    valid_mask = ~np.isnan(actual) & ~np.isnan(predicted) & ~np.isinf(actual) & ~np.isinf(predicted)
    actual = actual[valid_mask]
    predicted = predicted[valid_mask]
    
    if len(actual) == 0:
        logger.warning("Không có dữ liệu hợp lệ để tính chỉ số!")
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
    
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    mape = mean_absolute_percentage_error(actual, predicted) * 100 if np.all(np.abs(actual) > 1e-8) else np.nan
    smape = 100 * np.mean(2 * np.abs(predicted - actual) / (np.abs(actual) + np.abs(predicted)))
    norm_mape = mape / np.mean(np.abs(actual)) if not np.isnan(mape) else np.nan
    directional_acc = np.mean((np.diff(actual) * np.diff(predicted)) > 0) * 100 if len(actual) > 1 else np.nan
    
    return rmse, mae, mape, smape, norm_mape, directional_acc

def plot_forecast(historical, val, test, forecast, forecast_index, title, ylabel, filename):
    plt.figure(figsize=(12, 6))
    plt.plot(historical.index, historical, label='Lịch sử', color='blue')
    plt.plot(val.index, val, label='Xác thực', color='purple')
    plt.plot(test.index, test, label='Thực tế (Test)', color='green')
    plt.plot(forecast_index, forecast, label='Dự báo', color='orange', linestyle='--', linewidth=2)
    plt.title(title)
    plt.xlabel('Thời gian')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ {filename}: {str(e)}")
    plt.close()

def plot_comparison_forecasts(historical, val, test, forecasts, forecast_index, title, ylabel, filename, metrics=None):
    plt.figure(figsize=(14, 8))
    plt.plot(historical.index, historical, label='Lịch sử', color='blue')
    plt.plot(val.index, val, label='Xác thực', color='purple')
    plt.plot(test.index, test, label='Thực tế (Test)', color='green')
    colors = sns.color_palette("husl", len(forecasts))
    for (model_name, forecast), color in zip(forecasts.items(), colors):
        rmse = metrics.get(model_name, {}).get('Test RMSE', np.nan) if metrics else np.nan
        if forecast is None or pd.isna(rmse):
            logger.warning(f"Bỏ qua {model_name} trong biểu đồ so sánh do thiếu dự báo hoặc RMSE")
            continue
        label = f'Dự báo {model_name} (Test RMSE: {rmse:.4f})'
        plt.plot(forecast_index, forecast, label=label, linestyle='--', color=color)
    plt.title(title)
    plt.xlabel('Thời gian')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ so sánh: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ so sánh {filename}: {str(e)}")
    plt.close()

def plot_residuals(residuals, target, model_name, filename):
    plt.figure(figsize=(12, 6))
    plt.plot(residuals.index, residuals, label='Phần dư', color='red')
    plt.axhline(0, color='black', linestyle='--')
    plt.title(f'Phần dư của {model_name} cho {target}')
    plt.xlabel('Thời gian')
    plt.ylabel('Phần dư')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ phần dư: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ phần dư {filename}: {str(e)}")
    plt.close()

def plot_train_val_test_split(train, val, test, target, filename):
    plt.figure(figsize=(12, 6))
    plt.plot(train.index, train[target], label='Tập huấn luyện', color='blue')
    plt.plot(val.index, val[target], label='Tập xác thực', color='purple')
    plt.plot(test.index, test[target], label='Tập kiểm tra', color='green')
    plt.title(f'Phân chia dữ liệu cho {target}')
    plt.xlabel('Thời gian')
    plt.ylabel(target)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ phân chia dữ liệu: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ phân chia dữ liệu {filename}: {str(e)}")
    plt.close()

def run_time_series_cv(model, X, y, n_splits=CONFIG['cv_splits']):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    cv_scores = {'train_rmse': [], 'test_rmse': []}
    
    for train_idx, test_idx in tscv.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        
        model.fit(X_train, y_train)
        train_pred = model.predict(X_train)
        test_pred = model.predict(X_test)
        
        train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))
        test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))
        
        cv_scores['train_rmse'].append(train_rmse)
        cv_scores['test_rmse'].append(test_rmse)
    
    logger.info(f"Cross-validation: Mean Train RMSE={np.mean(cv_scores['train_rmse']):.4f}, "
                f"Mean Test RMSE={np.mean(cv_scores['test_rmse']):.4f}")
    return cv_scores

def select_features(model, X_train, y_train, feature_cols):
    selector = SelectFromModel(model, threshold="mean", prefit=False)
    selector.fit(X_train, y_train)
    selected_features = [feature_cols[i] for i in range(len(feature_cols)) if selector.get_support()[i]]
    if not selected_features:
        logger.warning("Không chọn được đặc trưng nào, sử dụng tất cả đặc trưng")
        return feature_cols
    logger.info(f"Selected features: {selected_features}")
    return selected_features

def combine_forecasts(forecasts):
    valid_forecasts = [f for f in forecasts.values() if f is not None and not f.isna().all()]
    if valid_forecasts:
        return pd.concat(valid_forecasts, axis=1).mean(axis=1)
    return None

def run_linear(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model = LinearRegression(n_jobs=CONFIG['n_jobs'])
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'Linear Regression', f'{target}_linear_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'Linear Regression Forecast for {target}', target,
                      f'{target}_linear_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"Linear Regression ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi Linear Regression ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_ridge(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {'alpha': 1.0, 'random_state': 42}
        if params:
            model_params.update(params)
        model = Ridge(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'Ridge', f'{target}_ridge_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'Ridge Forecast for {target}', target,
                      f'{target}_ridge_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"Ridge ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi Ridge ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_lasso(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {'alpha': 0.1, 'random_state': 42}  # Giảm alpha để tránh quá nhiều regularization
        if params:
            model_params.update(params)
        model = Lasso(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'Lasso', f'{target}_lasso_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'Lasso Forecast for {target}', target,
                      f'{target}_lasso_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"Lasso ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi Lasso ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_elasticnet(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {'alpha': 0.1, 'l1_ratio': 0.5, 'random_state': 42}  # Giảm alpha
        if params:
            model_params.update(params)
        model = ElasticNet(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'ElasticNet', f'{target}_elasticnet_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'ElasticNet Forecast for {target}', target,
                      f'{target}_elasticnet_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"ElasticNet ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi ElasticNet ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_svr(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1}  # Tăng C và epsilon
        if params:
            model_params.update(params)
        model = SVR(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'SVR', f'{target}_svr_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'SVR Forecast for {target}', target,
                      f'{target}_svr_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"SVR ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi SVR ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_random_forest(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {
            'n_estimators': 20,  # Tăng số cây
            'max_depth': 5,
            'min_samples_split': 5,
            'random_state': 32,
            'n_jobs': CONFIG['n_jobs']
        }
        if params:
            model_params.update(params)
        
        model = RandomForestRegressor(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'Random Forest', f'{target}_random_forest_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'Random Forest Forecast for {target}', target,
                      f'{target}_random_forest_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"Random Forest ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi Random Forest ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_xgboost(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {
            'n_estimators': 200,  # Tăng số cây
            'max_depth': 3,
            'min_child_weight': 2,
            'lambda': 1.0,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'random_state': 42,
            'n_jobs': CONFIG['n_jobs']
        }
        if params:
            model_params.update(params)
        
        model = XGBRegressor(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'XGBoost', f'{target}_xgboost_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'XGBoost Forecast for {target}', target,
                      f'{target}_xgboost_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"XGBoost ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi XGBoost ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_lightgbm(train, val, test, forecast_index, target, feature_cols, params=None):
    start_time = time.time()
    try:
        X_train = train[feature_cols].dropna()
        y_train = train[target].loc[X_train.index]
        X_val = val[feature_cols].reindex(val.index)
        y_val = val[target]
        X_test = test[feature_cols].reindex(forecast_index)
        y_test = test[target]
        
        logger.info(f"Kích thước X_train sau khi loại NaN ({target}): {len(X_train)}")
        logger.info(f"Thống kê y_train ({target}): {y_train.describe()}")
        logger.info(f"Phương sai của y_train ({target}): {np.var(y_train)}")
        if len(X_train) < CONFIG['min_data_length']:
            logger.error(f"Tập huấn luyện cho {target} quá nhỏ ({len(X_train)} mẫu)")
            return None, None, None, None, None, None, None, None, None, None
        
        if np.var(y_train) < 1e-8:
            logger.error(f"Phương sai của {target} quá thấp: {np.var(y_train)}")
            return None, None, None, None, None, None, None, None, None, None
        
        scaler = RobustScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)
        
        if np.any(np.isnan(X_train_scaled)) or np.any(np.isinf(X_train_scaled)):
            logger.error(f"X_train_scaled chứa NaN hoặc giá trị vô cực: NaN={np.isnan(X_train_scaled).sum()}, Inf={np.isinf(X_train_scaled).sum()}")
            return None, None, None, None, None, None, None, None, None, None
        
        model_params = {
            'n_estimators': 200,  # Tăng số cây
            'max_depth': 3,
            'min_data_in_leaf': 20,
            'lambda_l1': 0.1,
            'lambda_l2': 0.1,
            'random_state': 42,
            'n_jobs': CONFIG['n_jobs']
        }
        if params:
            model_params.update(params)
        
        model = LGBMRegressor(**model_params)
        
        cv_scores = run_time_series_cv(model, X_train_scaled, y_train)
        model.fit(X_train_scaled, y_train)
        
        train_pred = model.predict(X_train_scaled)
        val_pred = model.predict(X_val_scaled)
        forecast = model.predict(X_test_scaled)
        forecast = pd.Series(forecast, index=forecast_index)
        residuals = y_train - train_pred
        
        train_rmse, train_mae, train_mape, train_smape, train_norm_mape, train_dir_acc = calculate_metrics(y_train, train_pred)
        val_rmse, val_mae, val_mape, val_smape, val_norm_mape, val_dir_acc = calculate_metrics(y_val, val_pred)
        test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = calculate_metrics(y_test, forecast)
        
        plot_residuals(residuals, target, 'LightGBM', f'{target}_lightgbm_residuals.png')
        plot_forecast(train[target][-36:], y_val, y_test, forecast, forecast_index,
                      f'LightGBM Forecast for {target}', target,
                      f'{target}_lightgbm_forecast.png')
        
        elapsed_time = time.time() - start_time
        logger.info(f"LightGBM ({target}): "
                    f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}, Test RMSE={test_rmse:.4f}, "
                    f"Train MAE={train_mae:.4f}, Val MAE={val_mae:.4f}, Test MAE={test_mae:.4f}, "
                    f"Train MAPE={train_mape:.4f}, Val MAPE={val_mape:.4f}, Test MAPE={test_mape:.4f}, "
                    f"Time={elapsed_time:.2f}s")
        
        return forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc
    except Exception as e:
        logger.error(f"Lỗi LightGBM ({target}): {str(e)}")
        logger.error(f"Chi tiết dữ liệu: X_train shape={X_train.shape}, y_train shape={y_train.shape}")
        return None, None, None, None, None, None, None, None, None, None

def run_model_for_target(target, train, val, test, forecast_index, model_name, model_func, feature_cols, params=None):
    logger.info(f"Chạy {model_name} cho {target}")
    start_time = time.time()
    try:
        forecast, residuals, train_rmse, val_rmse, test_rmse, test_mae, test_mape, test_smape, test_norm_mape, test_dir_acc = model_func(
            train, val, test, forecast_index, target, feature_cols, params
        )
        
        if forecast is None or test_rmse is None:
            logger.error(f"{model_name} cho {target} không tạo được dự báo hoặc RMSE hợp lệ!")
            return None
        
        elapsed_time = time.time() - start_time
        logger.info(f"Hoàn thành {model_name} cho {target} trong {elapsed_time:.2f}s")
        return {
            'Target': target,
            'Model': model_name,
            'Train RMSE': train_rmse,
            'Val RMSE': val_rmse,
            'Test RMSE': test_rmse,
            'Test MAE': test_mae,
            'Test MAPE': test_mape,
            'Test sMAPE': test_smape,
            'Test NormMAPE': test_norm_mape,
            'Test DirAcc': test_dir_acc,
            'Forecast': forecast,
            'Residuals': residuals
        }
    except Exception as e:
        logger.error(f"Lỗi khi chạy {model_name} cho {target}: {str(e)}")
        return None

def main():
    try:
        data = pd.read_csv('data/analyzed_time_series.csv')
        data['time'] = pd.to_datetime(data['time'])
        data.set_index('time', inplace=True)
        required_columns = ['cpi_mom', 'cpi_yoy']
        validate_input_data(data, required_columns)
        
        logger.info(f"Kích thước dữ liệu gốc: {len(data)}")
        logger.info(f"Phương sai cpi_mom: {np.var(data['cpi_mom'])}, cpi_yoy: {np.var(data['cpi_yoy'])}")
        
        # Tạo đặc trưng cho từng mục tiêu
        features_mom = create_features(data, 'cpi_mom')
        features_yoy = create_features(data, 'cpi_yoy')
        
        # Chia dữ liệu thành train, val, test
        train_mom = features_mom[:-24]
        val_mom = features_mom[-24:-12]
        test_mom = features_mom[-12:]
        
        train_yoy = features_yoy[:-24]
        val_yoy = features_yoy[-24:-12]
        test_yoy = features_yoy[-12:]
        
        logger.info(f"Kích thước tập dữ liệu cho cpi_mom: Train={len(train_mom)}, Val={len(val_mom)}, Test={len(test_mom)}")
        logger.info(f"Kích thước tập dữ liệu cho cpi_yoy: Train={len(train_yoy)}, Val={len(val_yoy)}, Test={len(test_yoy)}")
        
        if len(test_mom) != CONFIG['forecast_horizon'] or len(test_yoy) != CONFIG['forecast_horizon']:
            raise ValueError(f"Kích thước tập kiểm tra không khớp với forecast_horizon ({CONFIG['forecast_horizon']})")
        
        forecast_index = pd.date_range(start=test_mom.index[0], periods=CONFIG['forecast_horizon'], freq='MS')
        
        # Vẽ biểu đồ phân chia dữ liệu
        plot_train_val_test_split(train_mom, val_mom, test_mom, 'cpi_mom', 'cpi_mom_train_val_test_split.png')
        plot_train_val_test_split(train_yoy, val_yoy, test_yoy, 'cpi_yoy', 'cpi_yoy_train_val_test_split.png')
        
        # Tạo danh sách đặc trưng riêng cho từng mục tiêu
        feature_cols_mom = [col for col in train_mom.columns if col not in ['cpi_mom', 'cpi_yoy']]
        feature_cols_yoy = [col for col in train_yoy.columns if col not in ['cpi_mom', 'cpi_yoy']]
        
        logger.info(f"Đặc trưng cho cpi_mom: {feature_cols_mom}")
        logger.info(f"Đặc trưng cho cpi_yoy: {feature_cols_yoy}")
        
        models = {
            'Random Forest': (run_random_forest, {'max_depth': 5, 'min_samples_split': 5}),
            'XGBoost': (run_xgboost, {'max_depth': 3, 'min_child_weight': 2, 'lambda': 1.0, 'subsample': 0.8, 'colsample_bytree': 0.8}),
            'LightGBM': (run_lightgbm, {'max_depth': 3, 'min_data_in_leaf': 5, 'lambda_l1': 0.01, 'lambda_l2': 0.1}),
            'Linear Regression': (run_linear, {}),
            'Ridge': (run_ridge, {'alpha': 1.0}),
            'Lasso': (run_lasso, {'alpha': 0.1}),
            'ElasticNet': (run_elasticnet, {'alpha': 0.1, 'l1_ratio': 0.5}),
            'SVR': (run_svr, {'C': 1.0, 'epsilon': 0.1})
        }
        
        results = []
        forecasts_mom = {}
        forecasts_yoy = {}
        metrics_mom = {}
        metrics_yoy = {}
        
        # Chạy mô hình cho cpi_mom
        logger.info(f"Chạy các mô hình cho cpi_mom")
        tasks_mom = [
            delayed(run_model_for_target)(
                'cpi_mom', train_mom, val_mom, test_mom, forecast_index, model_name, model_func, feature_cols_mom, params
            )
            for model_name, (model_func, params) in models.items()
        ]
        model_results_mom = Parallel(n_jobs=CONFIG['n_jobs'], verbose=1)(tasks_mom)
        
        for result in model_results_mom:
            if result is not None:
                results.append({
                    'Target': result['Target'],
                    'Model': result['Model'],
                    'Train RMSE': result['Train RMSE'],
                    'Val RMSE': result['Val RMSE'],
                    'Test RMSE': result['Test RMSE'],
                    'Test MAE': result['Test MAE'],
                    'Test MAPE': result['Test MAPE'],
                    'Test sMAPE': result['Test sMAPE'],
                    'Test NormMAPE': result['Test NormMAPE'],
                    'Test DirAcc': result['Test DirAcc']
                })
                forecasts_mom[result['Model']] = result['Forecast']
                metrics_mom[result['Model']] = {'Test RMSE': result['Test RMSE']}
            else:
                logger.warning(f"Kết quả cho một mô hình của cpi_mom là None, bỏ qua!")
        
        # Chạy mô hình cho cpi_yoy (tuần tự)
        logger.info(f"Chạy các mô hình cho cpi_yoy")
        model_results_yoy = []
        for model_name, (model_func, params) in models.items():
            result = run_model_for_target(
                'cpi_yoy', train_yoy, val_yoy, test_yoy, forecast_index, model_name, model_func, feature_cols_yoy, params
            )
            model_results_yoy.append(result)
        
        for result in model_results_yoy:
            if result is not None:
                results.append({
                    'Target': result['Target'],
                    'Model': result['Model'],
                    'Train RMSE': result['Train RMSE'],
                    'Val RMSE': result['Val RMSE'],
                    'Test RMSE': result['Test RMSE'],
                    'Test MAE': result['Test MAE'],
                    'Test MAPE': result['Test MAPE'],
                    'Test sMAPE': result['Test sMAPE'],
                    'Test NormMAPE': result['Test NormMAPE'],
                    'Test DirAcc': result['Test DirAcc']
                })
                forecasts_yoy[result['Model']] = result['Forecast']
                metrics_yoy[result['Model']] = {'Test RMSE': result['Test RMSE']}
            else:
                logger.warning(f"Kết quả cho một mô hình của cpi_yoy là None, bỏ qua!")
        
        # Vẽ biểu đồ so sánh
        for target, forecasts, metrics, train, val, test in [
            ('cpi_mom', forecasts_mom, metrics_mom, train_mom, val_mom, test_mom),
            ('cpi_yoy', forecasts_yoy, metrics_yoy, train_yoy, val_yoy, test_yoy)
        ]:
            if not forecasts:
                logger.warning(f"Không có dự báo hợp lệ cho {target}, bỏ qua biểu đồ so sánh")
                continue
            plot_comparison_forecasts(
                train[target][-36:], val[target], test[target], forecasts, forecast_index,
                f'Comparison of ML Forecasts for {target}', target,
                f'{target}_ml_model_comparison.png', metrics=metrics
            )
        
        # Lưu dự báo ensemble
        combined_mom = combine_forecasts(forecasts_mom)
        combined_yoy = combine_forecasts(forecasts_yoy)
        if combined_mom is not None:
            combined_mom.to_csv(f'{img_dir}/ensemble_forecast_cpi_mom.csv')
            logger.info(f"Dự báo ensemble cho cpi_mom đã được lưu")
        if combined_yoy is not None:
            combined_yoy.to_csv(f'{img_dir}/ensemble_forecast_cpi_yoy.csv')
            logger.info(f"Dự báo ensemble cho cpi_yoy đã được lưu")
        
        # Lưu kết quả
        results_df = pd.DataFrame(results)
        print(results_df)
        results_df.to_csv(CONFIG['results_file'], index=False)
        logger.info(f"Kết quả đã được lưu vào {CONFIG['results_file']}")
        
        # Lưu dự báo kết hợp
        for target, forecasts, feature_cols in [
            ('cpi_mom', forecasts_mom, feature_cols_mom),
            ('cpi_yoy', forecasts_yoy, feature_cols_yoy)
        ]:
            if not forecasts:
                logger.warning(f"Không có dự báo hợp lệ cho {target}, bỏ qua lưu dự báo kết hợp")
                continue
            combined_forecast = pd.DataFrame({'Date': forecast_index})
            for model_name, forecast in forecasts.items():
                combined_forecast[f'{model_name}_{target}'] = forecast
            combined_forecast.to_csv(f'{img_dir}/combined_forecast_{target}.csv', index=False)
            logger.info(f"Dự báo kết hợp cho {target} đã được lưu vào {img_dir}/combined_forecast_{target}.csv")
        
    except Exception as e:
        logger.error(f"Lỗi chương trình chính: {str(e)}")
        raise
    
if __name__ == "__main__":
    main()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done 4 out of 8 | elapsed:    6.1s remaining:    6.1s
[Parallel(n_jobs=-1)]: Done 8 out of 8 | elapsed:    6.4s finished


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 469
[LightGBM] [Info] Number of data points in the train set: 55, number of used features: 29
[LightGBM] [Info] Start training from score 104.627274
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 861
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 29
[LightGBM] [Info] Start training from score 103.877065
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1187
[LightGBM] [Info] Number of data points in the train set: 163, number of used features: 29
[LightGBM] [Info] Start training