# AutoGluon for automatic task
Try to use Autogluon for traditional baseline

In [15]:
import os
import logging

os.chdir('LLM4Traffic/code/Traditional')

logging.basicConfig(       
    level=logging.INFO,            
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',  
    handlers=[
        logging.FileHandler('logs/AutoGluon.log', mode='w'),  
        logging.StreamHandler()          
    ],
    force=True
)

logger = logging.getLogger()

In [16]:
dataset = 'tls'
type = 'add_info_ip'
experiment = 'polishednsLen811'

In [17]:
from autogluon.tabular import TabularDataset, TabularPredictor

data_path = f'LLM4Traffic/code/Traditional/datasets/{experiment}/{dataset}/{type}'

for id in range(1):
    train_data = TabularDataset(f'{data_path}/train_val_split_{id}/train.csv')
    tuning_data = TabularDataset(f'{data_path}/train_val_split_{id}/val.csv')
    test_data = TabularDataset(f'{data_path}/test.csv')
    model_path = f'AutogluonModels/{experiment}/{dataset}/{type}_{id}_123'
    os.makedirs(model_path, exist_ok=True)

    predictor = TabularPredictor(label='class', path=model_path).fit(train_data = train_data, tuning_data = tuning_data, num_gpus = 1, 
                                                    hyperparameters = {
                                                        'GBM': [{}],
                                                        'NN_TORCH': {},
                                                        'XGB': {},
                                                        'RF': [
                                                            {'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}},
                                                            {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, 
                                                            {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}]
                                                    })

    os.makedirs(f'results/{experiment}/{dataset}/{type}', exist_ok=True)
    predictor.leaderboard(test_data, extra_metrics=['f1_macro', 'f1_micro', 'precision_macro', 'precision_micro', 'recall_macro', 'recall_micro']).to_csv(f'results/{experiment}/{dataset}/{type}/{dataset}_{id}_123.csv')

Loaded data from: /home/dauin_user/yzhao/LLM4Traffic/code/Traditional/datasets/polishednsLen811/tls/add_info_ip/train_val_split_0/train.csv | Columns = 31 / 31 | Rows = 486516 -> 486516
Loaded data from: /home/dauin_user/yzhao/LLM4Traffic/code/Traditional/datasets/polishednsLen811/tls/add_info_ip/train_val_split_0/val.csv | Columns = 31 / 31 | Rows = 60816 -> 60816
Loaded data from: /home/dauin_user/yzhao/LLM4Traffic/code/Traditional/datasets/polishednsLen811/tls/add_info_ip/test.csv | Columns = 31 / 31 | Rows = 60871 -> 60871
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.11.10
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #173-Ubuntu SMP PREEMPT Tue Jul 11 08:08:57 UTC 2023
CPU Count:          64
Memory Avail:       326.10 GB / 376.41 GB (86.6%)
Disk Space Avail:   925335.94 GB / 1374003.67 GB (67.3%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended 

In [9]:
import numpy as np
import pandas as pd
import scipy.stats as stats

def calculate_ci(data):
    mean = np.mean(data)
    std_dev = np.std(data, ddof=1)
    sem = std_dev / np.sqrt(len(data))
    confidence_interval = stats.t.interval(0.95, len(data)-1, loc=mean, scale=sem)

    measurement_result = mean
    margin_of_error = (confidence_interval[1] - confidence_interval[0]) / 2

    return measurement_result * 100, margin_of_error * 100

models = ['RandomForestGini', 'XGBoost', 'LightGBM', 'NeuralNetTorch']
acc = {model: [] for model in models}  # 初始化字典，模型名为键，值为空列表
f1_macro = {model: [] for model in models}   # 同上
f1_micro = {model: [] for model in models}   # 同上
inference_time = {model: [] for model in models}

# 遍历不同的id
for id in range(3):
    # 读取当前id的数据
    df = pd.read_csv(f'results/{experiment}/{dataset}/{type}/{dataset}_{id}.csv')
    # 遍历每个模型，直接从数据中筛选出对应模型的score_test和f1_macro
    for model in models:
        model_data = df[df['model'] == model]
        acc[model].append(model_data['score_test'].values[0])  # 假设每个模型对应的score_test只有一个值
        f1_macro[model].append(model_data['f1_macro'].values[0])  # 同理，f1_macro也只有一个值
        f1_micro[model].append(model_data['f1_micro'].values[0])
        inference_time[model].append(model_data['pred_time_test'].values[0])

for model in models:
    print(model)

    measurement_result, margin_of_error = calculate_ci(acc[model])
    print(f"Reported Acc Result: {measurement_result:.1f} ± {margin_of_error:.1f} (95% confidence level)")

    measurement_result, margin_of_error = calculate_ci(f1_macro[model])
    print(f"Reported F1-Macro Result: {measurement_result:.1f} ± {margin_of_error:.1f} (95% confidence level)")

    measurement_result, margin_of_error = calculate_ci(f1_micro[model])
    print(f"Reported F1-micro Result: {measurement_result:.1f} ± {margin_of_error:.1f} (95% confidence level)")
    
    measurement_result, margin_of_error = calculate_ci(inference_time[model])
    print(f"Reported Time Result: {measurement_result/100:.1f} ± {margin_of_error/100:.1f} (95% confidence level)")

    print()

RandomForestGini
Reported Acc Result: 81.8 ± 3.0 (95% confidence level)
Reported F1-Macro Result: 78.0 ± 3.0 (95% confidence level)
Reported F1-micro Result: 81.8 ± 3.0 (95% confidence level)
Reported Time Result: 41.0 ± 8.9 (95% confidence level)

XGBoost
Reported Acc Result: 85.1 ± 1.9 (95% confidence level)
Reported F1-Macro Result: 82.0 ± 1.7 (95% confidence level)
Reported F1-micro Result: 85.1 ± 1.9 (95% confidence level)
Reported Time Result: 207.9 ± 106.7 (95% confidence level)

LightGBM
Reported Acc Result: 85.6 ± 1.8 (95% confidence level)
Reported F1-Macro Result: 82.4 ± 2.0 (95% confidence level)
Reported F1-micro Result: 85.6 ± 1.8 (95% confidence level)
Reported Time Result: 544.5 ± 425.1 (95% confidence level)

NeuralNetTorch
Reported Acc Result: 74.1 ± 2.8 (95% confidence level)
Reported F1-Macro Result: 68.8 ± 2.9 (95% confidence level)
Reported F1-micro Result: 74.1 ± 2.8 (95% confidence level)
Reported Time Result: 1.7 ± 0.1 (95% confidence level)

