In [7]:
import re
import pandas as pd

In [30]:
import os
import glob

# 目標資料夾
log_dir = os.path.join('..', 'outputs', 'logs')

# 找出所有 .txt 檔案的完整路徑
txt_paths = glob.glob(os.path.join(log_dir, '*.txt'))

file_list = []
# 印出檔名（不包含路徑）
for path in txt_paths:
    file_list.append(os.path.basename(path))

file_list.sort()
file_list


['log2025-06-10 03:59:48.123047.txt',
 'log2025-06-10 04:10:56.478748.txt',
 'log2025-06-10 04:27:29.775575.txt',
 'log2025-06-10 04:35:58.877936.txt',
 'log2025-06-10 04:50:50.788955.txt',
 'log2025-06-10 04:59:21.054235.txt',
 'log2025-06-10 05:14:31.564936.txt',
 'log2025-06-10 05:25:03.028841.txt',
 'log2025-06-10 05:37:46.010443.txt',
 'log2025-06-10 05:44:39.159663.txt',
 'log2025-06-10 05:51:15.262144.txt',
 'log2025-06-10 05:57:56.253279.txt',
 'log2025-06-10 06:04:29.959322.txt',
 'log2025-06-10 06:11:39.120909.txt',
 'log2025-06-10 06:18:04.865421.txt',
 'log2025-06-10 06:24:39.971342.txt']

In [31]:
import re
import pandas as pd


# 1. 讀取 log 檔
with open(f"../outputs/logs/{file_list[0]}", encoding='utf-8') as f:
    text = f.read()

# 2. 定義正則
method_pattern   = re.compile(r'(Mixup Method|Normal Method)')
state_pattern    = re.compile(r'以下：(?P<state>未擾動|擾動)')
metric_pattern   = re.compile(
    r'Mean Absolute Error:\s*(?P<mae>[\d\.]+).*?'
    r'Root Mean Squared Error:\s*(?P<rmse>[\d\.]+)',
    re.DOTALL
)
meta_pattern     = re.compile(
    r'dataset_name:\s*(?P<dataset>[^,]+),\s*'
    r'epsilon:\s*(?P<epsilon>[^,]+),\s*'
    r'attack_method:\s*(?P<attack>[^,]+),\s*'
    r'adversarial_model_name:\s*(?P<adv_model>\S+)'
)

# 3. 掃描
rows = []
current_method = None
current_state  = None
current_meta   = {}

for i, line in enumerate(text.splitlines()):
    # metadata
    m_meta = meta_pattern.search(line)
    if m_meta:
        current_meta = {
            'dataset_name': m_meta.group('dataset'),
            'epsilon': m_meta.group('epsilon'),
            'attack_method': m_meta.group('attack'),
            'adversarial_model_name': m_meta.group('adv_model'),
        }
        continue

    # 方法
    m = method_pattern.search(line)
    if m:
        current_method = m.group(1).replace(' Method','')
        continue

    # 狀態
    s = state_pattern.search(line)
    if s:
        current_state = s.group('state')
        continue

    # 指標
    block = "\n".join(text.splitlines()[i:i+5])
    mm = metric_pattern.search(block)
    if mm and current_method and current_state and current_meta:
        row = {
            'method': current_method,
            'state' : current_state,
            'mae'   : float(mm.group('mae')),
            'rmse'  : float(mm.group('rmse')),
        }
        # 把 metadata 加進來
        row.update(current_meta)
        rows.append(row)

# 4. 建 DataFrame
df = pd.DataFrame(rows)

# 5. 分組計算平均
summary = (
    df
    .groupby(
        ['dataset_name','epsilon','attack_method','adversarial_model_name',
         'method','state'],
        as_index=False
    )[['mae','rmse']]
    .mean()
)

# 6. 輸出
# 先把 metadata 唯一值列出來：
meta_rows = summary[['dataset_name','epsilon','attack_method','adversarial_model_name']].drop_duplicates()
print('=== Parsed Metadata ===')
print(meta_rows.to_string(index=False))

print('\n=== MAE & RMSE 平均（按方法+狀態）===')
print(summary[['method','state','mae','rmse']].to_string(index=False))


=== Parsed Metadata ===
    dataset_name epsilon attack_method adversarial_model_name
campus_processed     0.1          FGSM                  mixup

=== MAE & RMSE 平均（按方法+狀態）===
method state       mae      rmse
 Mixup    擾動 13.887047 18.852269
 Mixup   未擾動 11.224562 16.698839
Normal    擾動 16.625708 21.676217
Normal   未擾動  9.074347 13.566548


In [34]:
import os
import glob
import re
import pandas as pd

# 1. 取得所有 .txt log 檔
log_dir   = os.path.join('..', 'outputs', 'logs')
file_list = sorted(glob.glob(os.path.join(log_dir, '*.txt')))

# 2. 準備 regex
method_pattern = re.compile(r'(Mixup Method|Normal Method)')
state_pattern  = re.compile(r'以下：(?P<state>未擾動|擾動)')
metric_pattern = re.compile(
    r'Mean Absolute Error:\s*(?P<mae>[\d\.]+).*?'
    r'Root Mean Squared Error:\s*(?P<rmse>[\d\.]+)',
    re.DOTALL
)
meta_pattern   = re.compile(
    r'dataset_name:\s*(?P<dataset>[^,]+),\s*'
    r'epsilon:\s*(?P<epsilon>[^,]+),\s*'
    r'attack_method:\s*(?P<attack>[^,]+),\s*'
    r'adversarial_model_name:\s*(?P<adv_model>\S+)'
)

# 3. 逐檔解析
rows = []
for file_path in file_list:
    fn = os.path.basename(file_path)
    with open(file_path, encoding='utf-8') as f:
        text = f.read()

    current_method = None
    current_state  = None
    current_meta   = {}

    for i, line in enumerate(text.splitlines()):
        # 解析 metadata
        m_meta = meta_pattern.search(line)
        if m_meta:
            current_meta = {
                'dataset_name': m_meta.group('dataset'),
                'epsilon': m_meta.group('epsilon'),
                'attack_method': m_meta.group('attack'),
                'adversarial_model_name': m_meta.group('adv_model'),
            }
            continue

        # 解析方法
        m = method_pattern.search(line)
        if m:
            current_method = m.group(1).replace(' Method', '')
            continue

        # 解析狀態
        s = state_pattern.search(line)
        if s:
            current_state = s.group('state')
            continue

        # 解析指標
        block = "\n".join(text.splitlines()[i:i+5])
        mm = metric_pattern.search(block)
        if mm and current_method and current_state and current_meta:
            row = {
                'file': fn,
                'method': current_method,
                'state': current_state,
                'mae': float(mm.group('mae')),
                'rmse': float(mm.group('rmse')),
            }
            row.update(current_meta)
            rows.append(row)

# 4. 合併為 DataFrame
df = pd.DataFrame(rows)

# 5. 分組計算平均
summary = (
    df
    .groupby(
        ['file',
         'dataset_name','epsilon','attack_method','adversarial_model_name',
         'method','state'],
        as_index=False
    )[['mae','rmse']]
    .mean()
)

# 6. 輸出：完整表格
print(summary.to_string(index=False))
summary.to_csv('summary_metrics.csv', index=False, encoding='utf-8-sig')
print('已儲存 summary_metrics.csv')


                             file     dataset_name epsilon attack_method adversarial_model_name method state         mae        rmse
log2025-06-10 03:59:48.123047.txt campus_processed     0.1          FGSM                  mixup  Mixup    擾動   13.887047   18.852269
log2025-06-10 03:59:48.123047.txt campus_processed     0.1          FGSM                  mixup  Mixup   未擾動   11.224562   16.698839
log2025-06-10 03:59:48.123047.txt campus_processed     0.1          FGSM                  mixup Normal    擾動   16.625708   21.676217
log2025-06-10 03:59:48.123047.txt campus_processed     0.1          FGSM                  mixup Normal   未擾動    9.074347   13.566548
log2025-06-10 04:10:56.478748.txt campus_processed     0.1          FGSM                     AT  Mixup    擾動   11.751062   17.325382
log2025-06-10 04:10:56.478748.txt campus_processed     0.1          FGSM                     AT  Mixup   未擾動   10.104637   15.725702
log2025-06-10 04:10:56.478748.txt campus_processed     0.1          F