# Data preprocessing

In [1]:
import pandas as pd
import numpy as np

In [2]:
def check_nan(array, array_name):
    if np.any(np.isnan(array)):
        nan_indices = np.where(np.isnan(array))
        print(f"Found {len(nan_indices[0])} nan in {array_name}")
        for idx in zip(*nan_indices):
            id = tuple(int(x) for x in idx)
            # print(f"  Index {id}: Value = {array[idx]}")

    else:
        print(f"No NaN in {array_name}")

## **Drop columns**

In [3]:
file_path = "/content/CLN_SG_V2.xlsx"

df = pd.read_excel(file_path)


# Drop columns
null_count = df.isnull().mean()*100
print(null_count.sort_values(ascending=False))

kept_cols = df.columns[null_count <= 10]
removed_cols = df.columns[null_count > 10]
df = df[kept_cols]

print(f"kept cols: {kept_cols.to_list()}")
print(f"removed cols: {removed_cols.to_list()}")

# data.isna().sum()

print(f"kept {len(kept_cols.to_list())} cols")
print(f"removed {len(removed_cols.to_list())} cols")


FileNotFoundError: [Errno 2] No such file or directory: '/content/CLN_SG_V2.xlsx'

## **Outliers**

In [None]:
df_clean = df.copy()

for col in df_clean.select_dtypes(include='number').columns:
    mean = df_clean[col].mean()
    std = df_clean[col].std()
    lower = mean - 3 * std
    upper = mean + 3 * std

    df_clean[col] = np.where((df_clean[col] < lower) | (df_clean[col] > upper), np.nan, df_clean[col])

## **Missing values**

In [None]:
df_clean = df_clean.interpolate(method='linear')

df_clean = df_clean.fillna(method='bfill').fillna(method='ffill')
df = df_clean
df.isnull().sum().sum()

## **Features selection**

In [None]:
correlations = df.corr(numeric_only=True)['Man_song_saigon'].drop('Man_song_saigon')

selected_features = correlations[correlations > 0.5].index.tolist()

selected_features += ['Man_song_saigon', 'Ngay']

df_selected = df[selected_features] if selected_features else df[['Man_song_saigon', 'Ngay']]

df = df_selected
df.info()


## **Set index, choose col**

In [None]:
df = df_selected
df.info()

df.set_index('Ngay', inplace=True)

chosen_col = ['Man_song_saigon', 'Dodan_vao_nha_may', 'pH_Song_SG']
df = df[chosen_col]

df.head()

## **CREATE DATA**

In [None]:
column_index = df.columns.get_loc('Man_song_saigon')
print(f"Cột 'Man_song_saigon' là cột số: {column_index}")

# create data
import os
from sklearn.preprocessing import StandardScaler
import joblib

def create_sequences(data, target_col, window_size, forecast_horizon):
    X, y = [], []
    for i in range(len(data) - window_size - forecast_horizon + 1):
        window = data.iloc[i : i + window_size].values
        target_seq = data.iloc[i + window_size : i + window_size + forecast_horizon, target_col].values
        X.append(window)
        y.append(target_seq)
    return np.array(X), np.array(y)

output = {}

# Các tham số
n_aheads = [1, 3, 7]
window_sizes = [7, 15, 30]
vars = [['Man_song_saigon'], ['Man_song_saigon', 'Dodan_vao_nha_may', 'pH_Song_SG']]

for window_size in window_sizes:
    for n_ahead in n_aheads:
        for var in vars:
            # print(f"Forecast horizon: {n_ahead}")
            # print(f"Window size: {window_size}")
            # print(f"Variable: {var}\n")

            forecast_horizon = n_ahead

            # Tạo chuỗi
            X_all, y_all = create_sequences(df[var], target_col=0, window_size=window_size, forecast_horizon=forecast_horizon)

            # Chia 60% train, 20% val, 20% test
            n = len(X_all)
            train_end = int(n * 0.6)
            val_end = int(n * 0.8)

            X_train, y_train = X_all[:train_end], y_all[:train_end]
            X_val, y_val = X_all[train_end:val_end], y_all[train_end:val_end]
            X_test, y_test = X_all[val_end:], y_all[val_end:]

            # Khởi tạo StandardScaler
            scaler_X = StandardScaler()
            scaler_y = StandardScaler()

            # Reshape X_train để chuẩn hóa
            X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
            X_train_scaled = scaler_X.fit_transform(X_train_reshaped)
            X_train_scaled = X_train_scaled.reshape(X_train.shape)

            X_val_reshaped = X_val.reshape(-1, X_val.shape[-1])
            X_val_scaled = scaler_X.transform(X_val_reshaped)
            X_val_scaled = X_val_scaled.reshape(X_val.shape)

            X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])
            X_test_scaled = scaler_X.transform(X_test_reshaped)
            X_test_scaled = X_test_scaled.reshape(X_test.shape)

            # Chuẩn hóa y
            y_train_scaled = scaler_y.fit_transform(y_train)
            y_val_scaled = scaler_y.transform(y_val)
            y_test_scaled = scaler_y.transform(y_test)

            file_name = f"ws{window_size}_fh{n_ahead}_var{len(var)}"
            output.update({file_name:
                {
                    "X_train": X_train_scaled,
                    "y_train": y_train_scaled,
                    "X_test": X_test_scaled,
                    "y_test": y_test_scaled,
                    "X_val": X_val_scaled,
                    "y_val": y_val_scaled,
                    "scaler_X": scaler_X,
                    "scaler_y": scaler_y,
                }})

joblib.dump(output, 'data.pkl')

In [None]:
import os
import joblib
with open("/content/data.pkl", "rb") as file:
    output = joblib.load(file)
print(output.keys())
print(type(output))


# **MODELING**

**TKAN**

In [None]:
!pip install tkan -q

In [None]:
import numpy as np
import pandas as pd
import time
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from keras import Sequential, Input
from keras.layers import Dense, LSTM
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tkan import TKAN
import pickle
from tqdm import tqdm

In [None]:
# Disable XLA to prevent InvalidArgumentError
os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=0'
tf.config.optimizer.set_jit(False)

# Configure backend
os.environ['KERAS_BACKEND'] = 'jax'

# Evaluation functions
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def mean_absolute_percentage_error(y_true, y_pred):
    non_zero = y_true != 0
    if np.any(non_zero):
        return 100 * np.mean(np.abs((y_true[non_zero] - y_pred[non_zero]) / y_true[non_zero]))
    return np.inf

# Early stopping callback
def callbacks():
    return [EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)]

##**TKAN**

In [None]:


# Training parameters
BATCH_SIZE = 128
N_MAX_EPOCHS = 50
model_id = 'TKAN'
n_aheads = [1, 3, 7]

N_RUNS = 10

# Initialize result dictionaries
results_mae = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
results_mape = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
time_results = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
histories = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
result_rows = []
tkan_models = {}


for i in tqdm(range(N_RUNS), desc='RUN: '):
    # Process each dataset
    for data in output:
        match = re.search(r'ws(\d+)_fh(\d+)_var(\d+)', data)
        if not match:
            print(f"Không khớp định dạng: {data}")
            continue

        ws = int(match.group(1))
        n_ahead = int(match.group(2))
        n_var = int(match.group(3))
        print(f"ws = {ws}, n_ahead = {n_ahead}, n_var = {n_var}, Data: {data}")

        # Load data
        data_dict = output[data]
        X_train_scaled = data_dict['X_train']
        X_val_scaled = data_dict['X_val']
        X_test_scaled = data_dict['X_test']
        y_train_scaled = data_dict['y_train']
        y_val_scaled = data_dict['y_val']
        y_test_scaled = data_dict['y_test']
        scaler_y = data_dict['scaler_y']

        # Define TKAN model
        if n_ahead == 1:
            model = Sequential([
                Input(shape=X_train_scaled.shape[1:]),
                TKAN(units=8,
                     sub_kan_input_dim=15,
                     sub_kan_output_dim=15,
                     return_sequences=False,
                     activation='tanh',
                     recurrent_activation='sigmoid'),
                Dense(units=n_ahead, activation='linear')
            ], name=model_id)
        elif n_ahead == 3:
            model = Sequential([
                Input(shape=X_train_scaled.shape[1:]),
                TKAN(units=32,
                     sub_kan_input_dim=30,
                     sub_kan_output_dim=30,
                     return_sequences=False,
                     activation='tanh',
                     recurrent_activation='sigmoid'),
                Dense(units=n_ahead, activation='linear')
            ], name=model_id)
        elif n_ahead == 7:
            model = Sequential([
                Input(shape=X_train_scaled.shape[1:]),
                TKAN(units=64,
                     sub_kan_input_dim=40,
                     sub_kan_output_dim=40,
                     return_sequences=False,
                     activation='tanh',
                     recurrent_activation='sigmoid'),
                Dense(units=n_ahead, activation='linear')
            ], name=model_id)
        else:
            print(f"Unsupported n_ahead: {n_ahead}")
            continue

        # Compile model
        optimizer = Adam(0.001)
        model.compile(optimizer=optimizer, loss='mae', metrics=['mae'], jit_compile=False)

        # Train model
        start_time = time.time()
        history = model.fit(
            X_train_scaled, y_train_scaled,
            validation_data=(X_val_scaled, y_val_scaled),
            batch_size=BATCH_SIZE,
            epochs=N_MAX_EPOCHS,
            callbacks=callbacks(),
            shuffle=False,
            verbose=False
        )
        end_time = time.time()
        training_time = end_time - start_time

        # Store history
        histories[model_id][n_ahead].append(history.history)

        # Predict and inverse scale
        preds_scaled = model.predict(X_test_scaled, verbose=False)
        preds = scaler_y.inverse_transform(preds_scaled)
        y_test_orig = scaler_y.inverse_transform(y_test_scaled)

        # Calculate metrics
        mae = mean_absolute_error(y_true=y_test_orig, y_pred=preds)
        mape = mean_absolute_percentage_error(y_true=y_test_orig, y_pred=preds)

        # Store results
        results_mae[model_id][n_ahead].append(mae)
        results_mape[model_id][n_ahead].append(mape)
        time_results[model_id][n_ahead].append(training_time)

        # Store result row for CSV
        result_rows.append({
            'dataset': data,
            'model': model_id,
            'n_ahead': n_ahead,
            'ws': ws,
            'n_var': n_var,
            'mae': mae,
            'mape': mape,
            'training_time': training_time,
            'run': i
        })

        # Print results
        print(f"\t{model_id} - Time: {training_time:.2f}s, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
        print()

        model_key = f"{model_id}_{data}"
        tkan_models[model_key] = model
        # Clean up
        del model
        del optimizer

    # Save results to CSV
    results_df = pd.DataFrame(result_rows)
    results_df.to_csv('tkan_results.csv', index=False)
    print("TKAN results saved to 'tkan_results.csv'")

    with open('tkan_models.pkl', 'wb') as f:
        pickle.dump(tkan_models, f)

In [None]:
# CALCULATE AVERAGE AND SAVE
tkan_df = pd.read_csv('tkan_results.csv')
runs = tkan_df['run'].unique().tolist()
print(runs)
tkan_df_sorted = tkan_df.sort_values(by=['run'], ascending=True)


# cal avg
tkan_avg = tkan_df.groupby(['dataset', 'model', 'ws', 'n_ahead', 'n_var']).agg({
    'mae': 'mean',
    'mape': 'mean',
    'training_time': 'mean'
}).reset_index()


print(tkan_avg)
tkan_avg.to_csv('tkan_avg.csv', index=False)

##**LSTM**

In [None]:
# Training parameters
BATCH_SIZE = 128
N_MAX_EPOCHS = 50
model_id = 'LSTM'
n_aheads = [1, 3, 7]

N_RUNS = 10

# Initialize result dictionaries
results_mae = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
results_mape = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
time_results = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
histories = {model_id: {n_ahead: [] for n_ahead in n_aheads}}
result_rows = []
lstm_models = {}

for i in tqdm(range(N_RUNS), desc='RUN: '):
    # Process each dataset
    for data in output:
        match = re.search(r'ws(\d+)_fh(\d+)_var(\d+)', data)
        if not match:
            print(f"Không khớp định dạng: {data}")
            continue

        ws = int(match.group(1))
        n_ahead = int(match.group(2))
        n_var = int(match.group(3))
        print(f"ws = {ws}, n_ahead = {n_ahead}, n_var = {n_var}, Data: {data}")

        # Load data
        data_dict = output[data]
        X_train_scaled = data_dict['X_train']
        X_val_scaled = data_dict['X_val']
        X_test_scaled = data_dict['X_test']
        y_train_scaled = data_dict['y_train']
        y_val_scaled = data_dict['y_val']
        y_test_scaled = data_dict['y_test']
        scaler_y = data_dict['scaler_y']

        # Define LSTM model
        if n_ahead == 1:
            model = Sequential([
                Input(shape=X_train_scaled.shape[1:]),
                LSTM(units=8,
                     return_sequences=False,
                     activation='tanh',
                     recurrent_activation='sigmoid'),
                Dense(units=n_ahead, activation='linear')
            ], name=model_id)
        elif n_ahead == 3:
            model = Sequential([
                Input(shape=X_train_scaled.shape[1:]),
                LSTM(units=32,
                     return_sequences=False,
                     activation='tanh',
                     recurrent_activation='sigmoid'),
                Dense(units=n_ahead, activation='linear')
            ], name=model_id)
        elif n_ahead == 7:
            model = Sequential([
                Input(shape=X_train_scaled.shape[1:]),
                LSTM(units=64,
                     return_sequences=False,
                     activation='tanh',
                     recurrent_activation='sigmoid'),
                Dense(units=n_ahead, activation='linear')
            ], name=model_id)
        else:
            print(f"Unsupported n_ahead: {n_ahead}")
            continue

        # Compile model
        optimizer = Adam(0.001)
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae'], jit_compile=False)

        # Train model
        start_time = time.time()
        history = model.fit(
            X_train_scaled, y_train_scaled,
            validation_data=(X_val_scaled, y_val_scaled),
            batch_size=BATCH_SIZE,
            epochs=N_MAX_EPOCHS,
            callbacks=callbacks(),
            shuffle=False,
            verbose=False
        )
        end_time = time.time()
        training_time = end_time - start_time

        # Store history
        histories[model_id][n_ahead].append(history.history)

        # Predict and inverse scale
        preds_scaled = model.predict(X_test_scaled, verbose=False)
        preds = scaler_y.inverse_transform(preds_scaled)
        y_test_orig = scaler_y.inverse_transform(y_test_scaled)

        # Calculate metrics
        mae = mean_absolute_error(y_true=y_test_orig, y_pred=preds)
        mape = mean_absolute_percentage_error(y_true=y_test_orig, y_pred=preds)

        # Store results
        results_mae[model_id][n_ahead].append(mae)
        results_mape[model_id][n_ahead].append(mape)
        time_results[model_id][n_ahead].append(training_time)
        run = i

        # Store result row for CSV
        result_rows.append({
            'dataset': data,
            'model': model_id,
            'n_ahead': n_ahead,
            'ws': ws,
            'n_var': n_var,
            'mae': mae,
            'mape': mape,
            'training_time': training_time,
            'run': i
        })

        # Print results
        print(f"{model_id} - Time: {training_time:.2f}s, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
        print()
        #save model
        model_key = f"{model_id}_{data}"
        lstm_models[model_key] = model

        # Clean up
        del model
        del optimizer

# Save results to CSV
results_df = pd.DataFrame(result_rows)
results_df.to_csv(f'lstm_results.csv', index=False)
print("LSTM results saved to 'lstm_results.csv'")

with open('lstm_models.pkl', 'wb') as f:
    pickle.dump(lstm_models, f)

In [None]:
# calculate avg and save
lstm_df = pd.read_csv('lstm_results.csv')
runs = lstm_df['run'].unique().tolist()
print(runs)
lstm_df_sorted = lstm_df.sort_values(by=['run'], ascending=True)


# cal avg
lstm_avg = lstm_df.groupby(['dataset', 'model', 'ws', 'n_ahead', 'n_var']).agg({
    'mae': 'mean',
    'mape': 'mean',
    'training_time': 'mean'
}).reset_index()


print(lstm_avg)
lstm_avg.to_csv('lstm_avg.csv', index=False)


# Comparision

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def plot_comparison(tkan_df, lstm_df, metric='mae'):
    """
    Plot a bar chart comparing TKAN and LSTM models based on the specified metric.

    Parameters:
    tkan_df (pd.DataFrame): DataFrame containing TKAN results
    lstm_df (pd.DataFrame): DataFrame containing LSTM results
    metric (str): Metric to plot on y-axis ('mae' or 'mpae')
    """
    # Gộp lại thành một DataFrame
    combined_df = pd.concat([tkan_df, lstm_df], ignore_index=True)

    # Lấy danh sách dataset và mô hình
    datasets = combined_df['dataset'].unique()
    models = ['TKAN', 'LSTM']

    # Thiết lập vị trí x
    spacing = 1.5
    x = np.arange(len(datasets)) * spacing  # Vị trí từng nhóm
    bar_width = 0.35  # Độ rộng cột

    # Lấy giá trị cho metric được chỉ định
    metric_tkan = combined_df[combined_df['model'] == 'TKAN'].set_index('dataset').loc[datasets][metric].values
    metric_lstm = combined_df[combined_df['model'] == 'LSTM'].set_index('dataset').loc[datasets][metric].values

    # Khởi tạo biểu đồ
    fig, ax = plt.subplots(figsize=(16, 6))

    # Vẽ từng cột
    bar1 = ax.bar(x - bar_width/2, metric_tkan, width=bar_width, label='TKAN', color='tab:blue')
    bar2 = ax.bar(x + bar_width/2, metric_lstm, width=bar_width, label='LSTM', color='tab:orange')

    # Gán nhãn trên đỉnh cột
    annotate_x_spacing = 0.15
    for bar in bar1:  # TKAN - màu xanh
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(-annotate_x_spacing, 3),  # lệch sang trái
                    textcoords="offset points",
                    ha='right', va='bottom', fontsize=8)

    for bar in bar2:  # LSTM - màu cam
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(+annotate_x_spacing, 3),  # lệch sang phải
                    textcoords="offset points",
                    ha='left', va='bottom', fontsize=8)

    # Thiết lập trục và nhãn
    ax.set_ylabel(metric.upper())
    ax.set_title(f'So sánh {metric.upper()} của TKAN và LSTM theo từng bộ dataset')
    ax.set_xticks(x)
    ax.set_xticklabels(datasets, rotation=45, ha='right', fontsize=8)
    ax.legend(title='Model', loc='upper left')

    plt.tight_layout()
    plt.savefig(f"{metric.upper()}")
    plt.show()


In [None]:
# read data
# tkan_df = pd.read_excel('tkan_results.xlsx')
tkan_df = pd.read_excel('tkan_results_weighted.xlsx')
lstm_df = pd.read_csv('lstm_avg.csv')
if 'model' not in tkan_df.columns:
    tkan_df['model'] = "TKAN"

# Vẽ biểu đồ cho MAE
plot_comparison(tkan_df, lstm_df, metric='mae')

# Vẽ biểu đồ cho MPAE
plot_comparison(tkan_df, lstm_df, metric='mape')

#time plot
plot_comparison(tkan_df, lstm_df, metric='training_time')


In [None]:
import shutil
import os
from google.colab import files

# Define the name for the directory and zip file
save_name = "batch128_unit8-32-64_dim15-30-40_mae_2subkanlayer"

# Create a new directory in /content with the name save_name
save_dir = f"/content/{save_name}"
os.makedirs(save_dir, exist_ok=True)

# List of patterns to exclude
exclude_patterns = ["sample_data", ".config", ".zip"]

# Copy all files from /content to the new directory, excluding specified patterns
for item in os.listdir("/content"):
    if any(pattern in item for pattern in exclude_patterns):
        continue
    src_path = os.path.join("/content", item)
    dst_path = os.path.join(save_dir, item)
    # Skip the save_name directory itself to avoid recursive copying
    if src_path != save_dir and os.path.isfile(src_path):
        shutil.copy2(src_path, dst_path)
    elif os.path.isdir(src_path) and src_path != save_dir:
        shutil.copytree(src_path, dst_path, dirs_exist_ok=True)

# Create a zip file of the save_name directory
zip_path = f"/content/{save_name}.zip"
shutil.make_archive(f"/content/{save_name}", 'zip', save_dir)

# # Download the zip file
# files.download(zip_path)

# # Delete the save_name directory from /content
# shutil.rmtree(save_dir)

print(f"Directory {save_name} copied, zipped, downloaded, and deleted successfully.")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def plot_comparison(tkan_df, baseline_df, metric='mae'):
    # Gộp lại thành một DataFrame
    combined_df = pd.concat([tkan_df, baseline_df], ignore_index=True)

    # Lấy danh sách dataset và mô hình
    datasets = combined_df['dataset'].unique()
    models = ['TKAN', 'Baseline']

    # Thiết lập vị trí x
    spacing = 1.5
    x = np.arange(len(datasets)) * spacing  # Vị trí từng nhóm
    bar_width = 0.35  # Độ rộng cột

    # Lấy giá trị cho metric được chỉ định
    metric_tkan = combined_df[combined_df['model'] == 'TKAN'].set_index('dataset').loc[datasets][metric].values
    metric_lstm = combined_df[combined_df['model'] == 'Baseline'].set_index('dataset').loc[datasets][metric].values

    # Khởi tạo biểu đồ
    fig, ax = plt.subplots(figsize=(16, 6))

    # Vẽ từng cột
    bar1 = ax.bar(x - bar_width/2, metric_tkan, width=bar_width, label='TKAN', color='tab:blue')
    bar2 = ax.bar(x + bar_width/2, metric_lstm, width=bar_width, label='Baseline', color='tab:orange')

    # Gán nhãn trên đỉnh cột
    annotate_x_spacing = 0.15
    for bar in bar1:  # TKAN - màu xanh
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(-annotate_x_spacing, 3),  # lệch sang trái
                    textcoords="offset points",
                    ha='right', va='bottom', fontsize=8)

    for bar in bar2:  # LSTM - màu cam
        height = bar.get_height()
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(+annotate_x_spacing, 3),  # lệch sang phải
                    textcoords="offset points",
                    ha='left', va='bottom', fontsize=8)

    # Thiết lập trục và nhãn
    ax.set_ylabel(metric.upper())
    ax.set_title(f'So sánh {metric.upper()} của TKAN và LSTM theo từng bộ dataset')
    ax.set_xticks(x)
    ax.set_xticklabels(datasets, rotation=45, ha='right', fontsize=8)
    ax.legend(title='Model', loc='upper left')

    plt.tight_layout()
    plt.savefig(f"{metric.upper()}")
    plt.show()



# read data
tkan_df = pd.read_csv('tkan_results.csv')
baseline_df = pd.read_csv('baseline_results.csv')
baseline_df['model'] = 'Baseline'

# Vẽ biểu đồ cho MAE
plot_comparison(tkan_df, baseline_df, metric='mae')

# Vẽ biểu đồ cho MPAE
plot_comparison(tkan_df, baseline_df, metric='mape')