In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, MultiHeadAttention, LayerNormalization, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import os
import logging
from datetime import datetime
import warnings
from tqdm import tqdm
import seaborn as sns
from joblib import Parallel, delayed
import time

img_dir = 'dl_model_results'
# Tạo thư mục lưu hình ảnh nếu chưa tồn tại
os.makedirs(img_dir, exist_ok=True)

# Thiết lập logging
logging.basicConfig(
    filename=f'{img_dir}/deep_learning_models_log.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore")

# Tham số cấu hình
CONFIG = {
    'forecast_horizon': 12,
    'seasonal_periods': 12,
    'min_data_length': 24,
    'img_dir': img_dir,
    'results_file': f'{img_dir}/deep_learning_model_results.csv',
    'n_jobs': -1,
    'lstm_units': 32,
    'gru_units': 32,
    'mlp_units': [32, 16],
    'transformer_heads': 2,
    'transformer_units': 32,
    'look_back': 12,
    'epochs': 50,
    'batch_size': 32,
}

# Tạo thư mục lưu biểu đồ nếu chưa tồn tại
os.makedirs(CONFIG['img_dir'], exist_ok=True)

def validate_input_data(df, required_columns):
    """Kiểm tra tính hợp lệ của dữ liệu đầu vào."""
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Thiếu các cột bắt buộc: {required_columns}")
    if df.index.duplicated().any():
        raise ValueError("Index chứa giá trị trùng lặp!")
    if not df.index.is_monotonic_increasing:
        raise ValueError("Index không được sắp xếp tăng dần!")
    if df[required_columns].isnull().sum().any():
        raise ValueError(f"Dữ liệu chứa giá trị thiếu: {df[required_columns].isnull().sum().to_dict()}")
    if df[required_columns].replace([np.inf, -np.inf], np.nan).isnull().sum().any():
        raise ValueError("Dữ liệu chứa giá trị vô cực!")
    if not all(df[required_columns].dtypes.apply(lambda x: np.issubdtype(x, np.number))):
        raise ValueError("Một số cột không phải kiểu số!")

def calculate_metrics(actual, predicted):
    """Tính RMSE, MAE, MAPE, sMAPE, NormMAPE, và Directional Accuracy."""
    actual = np.array(actual, dtype=float)
    predicted = np.array(predicted, dtype=float)
    valid_mask = ~np.isnan(actual) & ~np.isnan(predicted) & ~np.isinf(actual) & ~np.isinf(predicted)
    actual = actual[valid_mask]
    predicted = predicted[valid_mask]
    
    if len(actual) == 0:
        logger.warning("Không có dữ liệu hợp lệ để tính chỉ số!")
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
    
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    mape = mean_absolute_percentage_error(actual, predicted) * 100 if np.all(np.abs(actual) > 1e-8) else np.nan
    smape = 100 * np.mean(2 * np.abs(predicted - actual) / (np.abs(actual) + np.abs(predicted)))
    norm_mape = mape / np.mean(np.abs(actual)) if not np.isnan(mape) else np.nan
    
    actual_diff = np.diff(actual)
    pred_diff = np.diff(predicted)
    directional_acc = np.mean((actual_diff * pred_diff) > 0) * 100 if len(actual_diff) > 0 else np.nan
    
    return rmse, mae, mape, smape, norm_mape, directional_acc

def plot_forecast(historical, test, forecast, forecast_index, title, ylabel, filename):
    """Vẽ và lưu biểu đồ dự báo."""
    plt.figure(figsize=(12, 6))
    plt.plot(historical.index, historical, label='Lịch sử', color='blue')
    plt.plot(test.index, test, label='Thực tế (Test)', color='green')
    plt.plot(forecast_index, forecast, label='Dự báo', color='orange', linestyle='--', linewidth=2)
    plt.title(title)
    plt.xlabel('Thời gian')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ {filename}: {str(e)}")
    plt.close()

def plot_comparison_forecasts(historical, test, forecasts, forecast_index, title, ylabel, filename, metrics=None):
    """Vẽ so sánh dự báo từ nhiều mô hình với chú thích chỉ số RMSE."""
    plt.figure(figsize=(14, 8))
    plt.plot(historical.index, historical, label='Lịch sử', color='blue')
    plt.plot(test.index, test, label='Thực tế (Test)', color='green')
    colors = sns.color_palette("husl", len(forecasts))
    for (model_name, forecast), color in zip(forecasts.items(), colors):
        rmse = metrics.get(model_name, {}).get('RMSE', np.nan) if metrics else np.nan
        if forecast is None or pd.isna(rmse):
            logger.warning(f"Bỏ qua {model_name} trong biểu đồ so sánh do thiếu dự báo hoặc RMSE")
            continue
        label = f'Dự báo {model_name} (RMSE: {rmse:.4f})'
        plt.plot(forecast_index, forecast, label=label, linestyle='--', color=color)
    plt.title(title)
    plt.xlabel('Thời gian')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ so sánh: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ so sánh {filename}: {str(e)}")
    plt.close()

def create_sequences(data, look_back, forecast_horizon=1):
    """Tạo chuỗi dữ liệu cho các mô hình."""
    X, y = [], []
    for i in range(len(data) - look_back - forecast_horizon + 1):
        X.append(data[i:(i + look_back)])
        y.append(data[i + look_back:i + look_back + forecast_horizon])
    return np.array(X), np.array(y)

def run_lstm(train, test, forecast_index, target, scaler):
    """Chạy mô hình LSTM."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        y_train = y_train[:, 0]  # Chỉ lấy giá trị đầu tiên của y
        
        model = Sequential()
        model.add(LSTM(units=CONFIG['lstm_units'], input_shape=(CONFIG['look_back'], 1)))
        model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"LSTM for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi LSTM for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_gru(train, test, forecast_index, target, scaler):
    """Chạy mô hình GRU."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        y_train = y_train[:, 0]
        
        model = Sequential()
        model.add(GRU(units=CONFIG['gru_units'], input_shape=(CONFIG['look_back'], 1)))
        model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"GRU for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi GRU for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_mlp(train, test, forecast_index, target, scaler):
    """Chạy mô hình MLP."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])  # Flatten for MLP
        y_train = y_train[:, 0]
        
        model = periodi
        for units in CONFIG['mlp_units']:
            model.add(Dense(units=units, activation='relu'))
            model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:].flatten()
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back']))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"MLP for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi MLP for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_transformer(train, test, forecast_index, target, scaler):
    """Chạy mô hình Transformer đơn giản."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        y_train = y_train[:, 0]
        
        inputs = Input(shape=(CONFIG['look_back'], 1))
        x = MultiHeadAttention(num_heads=CONFIG['transformer_heads'], 
                              key_dim=CONFIG['transformer_units'])(inputs, inputs)
        x = LayerNormalization(epsilon=1e-6)(x)
        x = Dense(CONFIG['transformer_units'], activation='relu')(x)
        x = Dropout(0.2)(x)
        x = Dense(1)(x)
        outputs = x[:, -1, :]  # Lấy giá trị cuối cùng
        model = Model(inputs, outputs)
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"Transformer for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi Transformer for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_seq2seq(train, test, forecast_index, target, scaler):
    """Chạy mô hình Seq2Seq đơn giản."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'], CONFIG['forecast_horizon'])
        
        encoder_inputs = Input(shape=(CONFIG['look_back'], 1))
        encoder = LSTM(CONFIG['lstm_units'], return_state=True)
        encoder_outputs, state_h, state_c = encoder(encoder_inputs)
        encoder_states = [state_h, state_c]
        
        decoder_inputs = Input(shape=(CONFIG['forecast_horizon'], 1))
        decoder_lstm = LSTM(CONFIG['lstm_units'], return_sequences=True)
        decoder_outputs = decoder_lstm(decoder_inputs, initial_state=encoder_states)
        decoder_dense = Dense(1)
        decoder_outputs = decoder_dense(decoder_outputs)
        
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        decoder_input_data = np.zeros((X_train.shape[0], CONFIG['forecast_horizon'], 1))
        model.fit([X_train, decoder_input_data], y_train, epochs=CONFIG['epochs'], 
                 batch_size=CONFIG['batch_size'], callbacks=[early_stopping], verbose=0)
        
        # Dự báo
        encoder_model = Model(encoder_inputs, encoder_states)
        decoder_state_input_h = Input(shape=(CONFIG['lstm_units'],))
        decoder_state_input_c = Input(shape=(CONFIG['lstm_units'],))
        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
        states_value = encoder_model.predict(last_sequence_reshaped, verbose=0)
        
        target_sequence = np.zeros((1, CONFIG['forecast_horizon'], 1))
        forecast_scaled = decoder_model.predict([target_sequence] + states_value, verbose=0)[0]
        forecast = scaler.inverse_transform(forecast_scaled[:, :, 0]).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict([X_train, decoder_input_data], verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled[:, :, 0]).flatten()[:len(train) - CONFIG['look_back']]
        residuals = train[CONFIG['look_back']:].values[:len(train_pred)] - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"Seq2Seq for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi Seq2Seq for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_model_for_target(target, train, test, forecast_index, model_name, model_func, scaler):
    """Chạy một mô hình cho một mục tiêu cụ thể."""
    logger.info(f"Chạy {model_name} cho {target}")
    start_time = time.time()
    try:
        forecast, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, ci = model_func(
            train[target], test[target], forecast_index, target, scaler
        )
        
        if forecast is None or rmse is None:
            logger.error(f"{model_name} cho {target} không tạo được dự báo hoặc RMSE hợp lệ!")
            return None
        
        plot_forecast(
            train[target][-36:], test[target], forecast, forecast_index,
            f'{model_name} Forecast for {target}', target,
            f'{target}_{model_name}_forecast.png'
        )
        elapsed_time = time.time() - start_time
        logger.info(f"Hoàn thành {model_name} cho {target} trong {elapsed_time:.2f}s")
        return {
            'Target': target,
            'Model': model_name,
            'RMSE': rmse,
            'MAE': mae,
            'MAPE': mape,
            'sMAPE': smape,
            'NormMAPE': norm_mape,
            'DirAcc': dir_acc,
            'Forecast': forecast,
            'Residuals': residuals,
            'CI': ci
        }
    except Exception as e:
        logger.error(f"Lỗi khi chạy {model_name} cho {target}: {str(e)}")
        return None

def main():
    """Hàm chính để chạy các mô hình DL và lưu kết quả."""
    try:
        data = pd.read_csv('data/analyzed_time_series.csv')
        data['time'] = pd.to_datetime(data['time'])
        data.set_index('time', inplace=True)
        required_columns = ['cpi_mom', 'cpi_yoy']
        validate_input_data(data, required_columns)
        
        train_size = len(data) - CONFIG['forecast_horizon']
        train, test = data[:train_size], data[train_size:]
        forecast_index = pd.date_range(start=test.index[0], periods=CONFIG['forecast_horizon'], freq='MS')
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        
        models = {
            'LSTM': (run_lstm, {}),
            'GRU': (run_gru, {}),
            'MLP': (run_mlp, {}),
            'Transformer': (run_transformer, {}),
            'Seq2Seq': (run_seq2seq, {})
        }
        
        results = []
        forecasts_mom = {}
        forecasts_yoy = {}
        metrics_mom = {}
        metrics_yoy = {}
        
        for target in ['cpi_mom', 'cpi_yoy']:
            logger.info(f"Chạy các mô hình cho {target}")
            tasks = [
                delayed(run_model_for_target)(
                    target, train, test, forecast_index, model_name, model_func, scaler
                )
                for model_name, (model_func, params) in models.items()
            ]
            model_results = Parallel(n_jobs=CONFIG['n_jobs'], verbose=1)(tasks)
            
            for result in model_results:
                if result is not None:
                    results.append({
                        'Target': result['Target'],
                        'Model': result['Model'],
                        'RMSE': result['RMSE'],
                        'MAE': result['MAE'],
                        'MAPE': result['MAPE'],
                        'sMAPE': result['sMAPE'],
                        'NormMAPE': result['NormMAPE'],
                        'DirAcc': result['DirAcc']
                    })
                    if result['Target'] == 'cpi_mom':
                        forecasts_mom[result['Model']] = result['Forecast']
                        metrics_mom[result['Model']] = {'RMSE': result['RMSE']}
                    else:
                        forecasts_yoy[result['Model']] = result['Forecast']
                        metrics_yoy[result['Model']] = {'RMSE': result['RMSE']}
                else:
                    logger.warning(f"Kết quả cho một mô hình của {target} là None, bỏ qua!")
        
        for target, forecasts, metrics in [('cpi_mom', forecasts_mom, metrics_mom), ('cpi_yoy', forecasts_yoy, metrics_yoy)]:
            if not forecasts:
                logger.warning(f"Không có dự báo hợp lệ cho {target}, bỏ qua biểu đồ so sánh")
                continue
            plot_comparison_forecasts(
                train[target][-36:], test[target], forecasts, forecast_index,
                f'Comparison of Forecasts for {target}', target,
                f'{target}_model_comparison.png', metrics=metrics
            )
        
        results_df = pd.DataFrame(results)
        print(results_df)
        results_df.to_csv(CONFIG['results_file'], index=False)
        logger.info(f"Kết quả đã được lưu vào {CONFIG['results_file']}")
        
        for target, forecasts in [('cpi_mom', forecasts_mom), ('cpi_yoy', forecasts_yoy)]:
            if not forecasts:
                logger.warning(f"Không có dự báo hợp lệ cho {target}, bỏ qua lưu dự báo kết hợp")
                continue
            combined_forecast = pd.DataFrame({'Date': forecast_index})
            for model_name, forecast in forecasts.items():
                combined_forecast[f'{model_name}_{target}'] = forecast
            combined_forecast.to_csv(f'{img_dir}/combined_forecast_{target}.csv', index=False)
            logger.info(f"Dự báo kết hợp cho {target} đã được lưu vào combined_forecast_{target}.csv")
        
    except Exception as e:
        logger.error(f"Lỗi chương trình chính: {str(e)}")
        raise

if __name__ == "__main__":
    main()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed:   17.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed:   12.3s finished


    Target        Model      RMSE       MAE      MAPE     sMAPE  NormMAPE  \
0  cpi_mom         LSTM  0.316042  0.236741  0.236087  0.236009  0.002355   
1  cpi_mom          GRU  0.316257  0.236115  0.235401  0.235386  0.002348   
2  cpi_mom  Transformer  0.300247  0.212500  0.211758  0.211845  0.002112   
3  cpi_yoy         LSTM  2.832642  2.266489  2.195827  2.158757  0.021187   
4  cpi_yoy          GRU  1.565676  1.139194  1.105349  1.093911  0.010665   
5  cpi_yoy  Transformer  1.916821  1.797977  1.738987  1.721921  0.016779   

      DirAcc  
0  45.454545  
1  63.636364  
2   0.000000  
3  54.545455  
4  54.545455  
5   0.000000  


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, MultiHeadAttention, LayerNormalization, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import os
import logging
from datetime import datetime
import warnings
from tqdm import tqdm
import seaborn as sns
from joblib import Parallel, delayed
import time

img_dir = 'dl_model_results'
os.makedirs(img_dir, exist_ok=True)

logging.basicConfig(
    filename=f'{img_dir}/deep_learning_models_log.txt',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore")

CONFIG = {
    'forecast_horizon': 12,
    'seasonal_periods': 12,
    'min_data_length': 24,
    'img_dir': img_dir,
    'results_file': f'{img_dir}/deep_learning_model_results.csv',
    'n_jobs': -1,
    'lstm_units': 32,
    'gru_units': 32,
    'mlp_units': [32, 16],
    'transformer_heads': 2,
    'transformer_units': 32,
    'look_back': 12,
    'epochs': 50,
    'batch_size': 32,
}

def validate_input_data(df, required_columns):
    """Kiểm tra tính hợp lệ của dữ liệu đầu vào."""
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Thiếu các cột bắt buộc: {required_columns}")
    if df.index.duplicated().any():
        raise ValueError("Index chứa giá trị trùng lặp!")
    if not df.index.is_monotonic_increasing:
        raise ValueError("Index không được sắp xếp tăng dần!")
    if df[required_columns].isnull().sum().any():
        raise ValueError(f"Dữ liệu chứa giá trị thiếu: {df[required_columns].isnull().sum().to_dict()}")
    if df[required_columns].replace([np.inf, -np.inf], np.nan).isnull().sum().any():
        raise ValueError("Dữ liệu chứa giá trị vô cực!")
    if not all(df[required_columns].dtypes.apply(lambda x: np.issubdtype(x, np.number))):
        raise ValueError("Một số cột không phải kiểu số!")

def calculate_metrics(actual, predicted):
    """Tính RMSE, MAE, MAPE, sMAPE, NormMAPE, và Directional Accuracy."""
    actual = np.array(actual, dtype=float)
    predicted = np.array(predicted, dtype=float)
    valid_mask = ~np.isnan(actual) & ~np.isnan(predicted) & ~np.isinf(actual) & ~np.isinf(predicted)
    actual = actual[valid_mask]
    predicted = predicted[valid_mask]
    
    if len(actual) == 0:
        logger.warning("Không có dữ liệu hợp lệ để tính chỉ số!")
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
    
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    mape = mean_absolute_percentage_error(actual, predicted) * 100 if np.all(np.abs(actual) > 1e-8) else np.nan
    smape = 100 * np.mean(2 * np.abs(predicted - actual) / (np.abs(actual) + np.abs(predicted)))
    norm_mape = mape / np.mean(np.abs(actual)) if not np.isnan(mape) else np.nan
    
    actual_diff = np.diff(actual)
    pred_diff = np.diff(predicted)
    directional_acc = np.mean((actual_diff * pred_diff) > 0) * 100 if len(actual_diff) > 0 else np.nan
    
    return rmse, mae, mape, smape, norm_mape, directional_acc

def plot_forecast(historical, test, forecast, forecast_index, title, ylabel, filename):
    """Vẽ và lưu biểu đồ dự báo."""
    plt.figure(figsize=(12, 6))
    plt.plot(historical.index, historical, label='Lịch sử', color='blue')
    plt.plot(test.index, test, label='Thực tế (Test)', color='green')
    plt.plot(forecast_index, forecast, label='Dự báo', color='orange', linestyle='--', linewidth=2)
    plt.title(title)
    plt.xlabel('Thời gian')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ {filename}: {str(e)}")
    plt.close()

def plot_comparison_forecasts(historical, test, forecasts, forecast_index, title, ylabel, filename, metrics=None):
    """Vẽ so sánh dự báo từ nhiều mô hình với chú thích chỉ số RMSE."""
    plt.figure(figsize=(14, 8))
    plt.plot(historical.index, historical, label='Lịch sử', color='blue')
    plt.plot(test.index, test, label='Thực tế (Test)', color='green')
    colors = sns.color_palette("husl", len(forecasts))
    for (model_name, forecast), color in zip(forecasts.items(), colors):
        rmse = metrics.get(model_name, {}).get('RMSE', np.nan) if metrics else np.nan
        if forecast is None or pd.isna(rmse):
            logger.warning(f"Bỏ qua {model_name} trong biểu đồ so sánh do thiếu dự báo hoặc RMSE")
            continue
        label = f'Dự báo {model_name} (RMSE: {rmse:.4f})'
        plt.plot(forecast_index, forecast, label=label, linestyle='--', color=color)
    plt.title(title)
    plt.xlabel('Thời gian')
    plt.ylabel(ylabel)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    try:
        plt.savefig(os.path.join(CONFIG['img_dir'], filename))
        logger.info(f"Đã lưu biểu đồ so sánh: {filename}")
    except Exception as e:
        logger.error(f"Lỗi khi lưu biểu đồ so sánh {filename}: {str(e)}")
    plt.close()

def create_sequences(data, look_back, forecast_horizon=1):
    """Tạo chuỗi dữ liệu cho các mô hình."""
    X, y = [], []
    for i in range(len(data) - look_back - forecast_horizon + 1):
        X.append(data[i:(i + look_back)])
        y.append(data[i + look_back:i + look_back + forecast_horizon])
    return np.array(X), np.array(y)

def run_lstm(train, test, forecast_index, target, scaler):
    """Chạy mô hình LSTM."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        y_train = y_train[:, 0]
        
        model = Sequential()
        model.add(LSTM(units=CONFIG['lstm_units'], input_shape=(CONFIG['look_back'], 1)))
        model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"LSTM for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi LSTM for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_gru(train, test, forecast_index, target, scaler):
    """Chạy mô hình GRU."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        y_train = y_train[:, 0]
        
        model = Sequential()
        model.add(GRU(units=CONFIG['gru_units'], input_shape=(CONFIG['look_back'], 1)))
        model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"GRU for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi GRU for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_mlp(train, test, forecast_index, target, scaler):
    """Chạy mô hình MLP."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
        y_train = y_train[:, 0]
        
        model = Sequential()
        for units in CONFIG['mlp_units']:
            model.add(Dense(units=units, activation='relu'))
            model.add(Dropout(0.2))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:].flatten()
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back']))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"MLP for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi MLP for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_transformer(train, test, forecast_index, target, scaler):
    """Chạy mô hình Transformer đơn giản."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'])
        y_train = y_train[:, 0]
        
        inputs = Input(shape=(CONFIG['look_back'], 1))
        x = MultiHeadAttention(num_heads=CONFIG['transformer_heads'], 
                              key_dim=CONFIG['transformer_units'])(inputs, inputs)
        x = LayerNormalization(epsilon=1e-6)(x)
        x = Dense(CONFIG['transformer_units'], activation='relu')(x)
        x = Dropout(0.2)(x)
        x = Dense(1)(x)
        outputs = x[:, -1, :]
        model = Model(inputs, outputs)
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        forecast = []
        for _ in range(CONFIG['forecast_horizon']):
            last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
            next_pred = model.predict(last_sequence_reshaped, verbose=0)
            forecast.append(next_pred[0, 0])
            last_sequence = np.roll(last_sequence, -1)
            last_sequence[-1] = next_pred
        
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()
        residuals = train[CONFIG['look_back']:].values - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"Transformer for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi Transformer for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_seq2seq(train, test, forecast_index, target, scaler):
    """Chạy mô hình Seq2Seq đơn giản."""
    start_time = time.time()
    try:
        train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
        test_scaled = scaler.transform(test.values.reshape(-1, 1))
        
        X_train, y_train = create_sequences(train_scaled, CONFIG['look_back'], CONFIG['forecast_horizon'])
        
        model = Sequential()
        model.add(LSTM(CONFIG['lstm_units'], input_shape=(CONFIG['look_back'], 1), return_sequences=False))
        model.add(Dropout(0.2))
        model.add(Dense(CONFIG['forecast_horizon']))
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=CONFIG['epochs'], batch_size=CONFIG['batch_size'], 
                 callbacks=[early_stopping], verbose=0)
        
        last_sequence = train_scaled[-CONFIG['look_back']:]
        last_sequence_reshaped = last_sequence.reshape((1, CONFIG['look_back'], 1))
        forecast_scaled = model.predict(last_sequence_reshaped, verbose=0)
        forecast = scaler.inverse_transform(forecast_scaled).flatten()
        forecast_series = pd.Series(forecast, index=forecast_index)
        
        train_pred_scaled = model.predict(X_train, verbose=0)
        train_pred = scaler.inverse_transform(train_pred_scaled).flatten()[:len(train) - CONFIG['look_back']]
        residuals = train[CONFIG['look_back']:].values[:len(train_pred)] - train_pred
        
        rmse, mae, mape, smape, norm_mape, dir_acc = calculate_metrics(test, forecast_series)
        elapsed_time = time.time() - start_time
        logger.info(f"Seq2Seq for {target}: RMSE={rmse:.4f}, MAE={mae:.4f}, MAPE={mape:.4f}, "
                    f"sMAPE={smape:.4f}, NormMAPE={norm_mape:.4f}, DirAcc={dir_acc:.4f}, Time={elapsed_time:.2f}s")
        
        return forecast_series, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, None
    
    except Exception as e:
        logger.error(f"Lỗi Seq2Seq for {target}: {str(e)}")
        return None, None, None, None, None, None, None, None, None

def run_model_for_target(target, train, test, forecast_index, model_name, model_func, scaler):
    """Chạy một mô hình cho một mục tiêu cụ thể."""
    logger.info(f"Chạy {model_name} cho {target}")
    start_time = time.time()
    try:
        forecast, residuals, rmse, mae, mape, smape, norm_mape, dir_acc, ci = model_func(
            train[target], test[target], forecast_index, target, scaler
        )
        
        if forecast is None or rmse is None:
            logger.error(f"{model_name} cho {target} không tạo được dự báo hoặc RMSE hợp lệ!")
            return None
        
        plot_forecast(
            train[target][-36:], test[target], forecast, forecast_index,
            f'{model_name} Forecast for {target}', target,
            f'{target}_{model_name}_forecast.png'
        )
        elapsed_time = time.time() - start_time
        logger.info(f"Hoàn thành {model_name} cho {target} trong {elapsed_time:.2f}s")
        return {
            'Target': target,
            'Model': model_name,
            'RMSE': rmse,
            'MAE': mae,
            'MAPE': mape,
            'sMAPE': smape,
            'NormMAPE': norm_mape,
            'DirAcc': dir_acc,
            'Forecast': forecast,
            'Residuals': residuals,
            'CI': ci
        }
    except Exception as e:
        logger.error(f"Lỗi khi chạy {model_name} cho {target}: {str(e)}")
        return None

def main():
    """Hàm chính để chạy các mô hình DL và lưu kết quả."""
    try:
        data = pd.read_csv('data/analyzed_time_series.csv')
        data['time'] = pd.to_datetime(data['time'])
        data.set_index('time', inplace=True)
        required_columns = ['cpi_mom', 'cpi_yoy']
        validate_input_data(data, required_columns)
        
        train_size = len(data) - CONFIG['forecast_horizon']
        train, test = data[:train_size], data[train_size:]
        forecast_index = pd.date_range(start=test.index[0], periods=CONFIG['forecast_horizon'], freq='MS')
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        
        models = {
            'LSTM': (run_lstm, {}),
            'GRU': (run_gru, {}),
            'MLP': (run_mlp, {}),
            'Transformer': (run_transformer, {}),
            'Seq2Seq': (run_seq2seq, {})
        }
        
        results = []
        forecasts_mom = {}
        forecasts_yoy = {}
        metrics_mom = {}
        metrics_yoy = {}
        
        for target in ['cpi_mom', 'cpi_yoy']:
            logger.info(f"Chạy các mô hình cho {target}")
            tasks = [
                delayed(run_model_for_target)(
                    target, train, test, forecast_index, model_name, model_func, scaler
                )
                for model_name, (model_func, params) in models.items()
            ]
            model_results = Parallel(n_jobs=CONFIG['n_jobs'], verbose=1)(tasks)
            
            for result in model_results:
                if result is not None:
                    results.append({
                        'Target': result['Target'],
                        'Model': result['Model'],
                        'RMSE': result['RMSE'],
                        'MAE': result['MAE'],
                        'MAPE': result['MAPE'],
                        'sMAPE': result['sMAPE'],
                        'NormMAPE': result['NormMAPE'],
                        'DirAcc': result['DirAcc']
                    })
                    if result['Target'] == 'cpi_mom':
                        forecasts_mom[result['Model']] = result['Forecast']
                        metrics_mom[result['Model']] = {'RMSE': result['RMSE']}
                    else:
                        forecasts_yoy[result['Model']] = result['Forecast']
                        metrics_yoy[result['Model']] = {'RMSE': result['RMSE']}
                else:
                    logger.warning(f"Kết quả cho một mô hình của {target} là None, bỏ qua!")
        
        for target, forecasts, metrics in [('cpi_mom', forecasts_mom, metrics_mom), ('cpi_yoy', forecasts_yoy, metrics_yoy)]:
            if not forecasts:
                logger.warning(f"Không có dự báo hợp lệ cho {target}, bỏ qua biểu đồ so sánh")
                continue
            plot_comparison_forecasts(
                train[target][-36:], test[target], forecasts, forecast_index,
                f'Comparison of Forecasts for {target}', target,
                f'{target}_model_comparison.png', metrics=metrics
            )
        
        results_df = pd.DataFrame(results)
        print(results_df)
        results_df.to_csv(CONFIG['results_file'], index=False)
        logger.info(f"Kết quả đã được lưu vào {CONFIG['results_file']}")
        
        for target, forecasts in [('cpi_mom', forecasts_mom), ('cpi_yoy', forecasts_yoy)]:
            if not forecasts:
                logger.warning(f"Không có dự báo hợp lệ cho {target}, bỏ qua lưu dự báo kết hợp")
                continue
            combined_forecast = pd.DataFrame({'Date': forecast_index})
            for model_name, forecast in forecasts.items():
                combined_forecast[f'{model_name}_{target}'] = forecast
            combined_forecast.to_csv(f'{img_dir}/combined_forecast_{target}.csv', index=False)
            logger.info(f"Dự báo kết hợp cho {target} đã được lưu vào combined_forecast_{target}.csv")
        
    except Exception as e:
        logger.error(f"Lỗi chương trình chính: {str(e)}")
        raise

if __name__ == "__main__":
    main()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed:    7.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed:    6.8s finished


    Target        Model      RMSE       MAE      MAPE     sMAPE  NormMAPE  \
0  cpi_mom         LSTM  0.309290  0.225492  0.224831  0.224798  0.002243   
1  cpi_mom          GRU  0.338516  0.270818  0.270087  0.269962  0.002694   
2  cpi_mom          MLP  0.284207  0.200287  0.199559  0.199612  0.001991   
3  cpi_mom  Transformer  0.299966  0.212500  0.211749  0.211845  0.002112   
4  cpi_mom      Seq2Seq  0.326438  0.255195  0.254508  0.254397  0.002539   
5  cpi_yoy         LSTM  1.088746  0.789984  0.766410  0.761086  0.007395   
6  cpi_yoy          GRU  3.328705  2.697325  2.612690  2.561679  0.025209   
7  cpi_yoy          MLP  0.839439  0.717121  0.690805  0.694071  0.006665   
8  cpi_yoy  Transformer  1.970888  1.855510  1.794501  1.776467  0.017314   
9  cpi_yoy      Seq2Seq  1.413090  1.002847  0.973210  0.963881  0.009390   

      DirAcc  
0  45.454545  
1  63.636364  
2  45.454545  
3   0.000000  
4  36.363636  
5  54.545455  
6  54.545455  
7  36.363636  
8   0.000000  
9 