In [None]:
import requests
import json

def get_mitre_attack_mappings(attack_domain_url):
    """
    從 MITRE ATT&CK STIX JSON 獲取 Tactic 和 Technique 的 ID 和 Name 對應表。

    Args:
        attack_domain_url (str): 攻擊領域的 STIX JSON 檔案 URL。
                                 例如: 'https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json'

    Returns:
        tuple: 包含兩個字典的元組，分別是 tactic_mappings 和 technique_mappings。
               tactic_mappings: {tactic_id: tactic_name, ...}
               technique_mappings: {technique_id: technique_name, ...}
    """
    tactic_mappings = {}
    technique_mappings = {}

    try:
        response = requests.get(attack_domain_url)
        response.raise_for_status()  # 檢查 HTTP 請求是否成功
        attack_data = response.json()

        for obj in attack_data['objects']:
            # 處理 Tactics
            if obj.get('type') == 'x-mitre-tactic':
                tactic_id = obj.get('external_references')[0]['external_id'] if obj.get('external_references') else None
                tactic_name = obj.get('name')
                if tactic_id and tactic_name:
                    tactic_mappings[tactic_id] = tactic_name
            
            # 處理 Techniques (包括 Sub-techniques)
            elif obj.get('type') == 'attack-pattern':
                technique_id = obj.get('external_references')[0]['external_id'] if obj.get('external_references') else None
                technique_name = obj.get('name')
                if technique_id and technique_name:
                    technique_mappings[technique_id] = technique_name
        
        return tactic_mappings, technique_mappings

    except requests.exceptions.RequestException as e:
        print(f"錯誤：無法從 URL 獲取資料。請檢查網路連線或 URL：{e}")
        return {}, {}
    except json.JSONDecodeError:
        print(f"錯誤：無法解析 JSON 資料。請檢查 URL 的內容是否為有效的 JSON。")
        return {}, {}
    except Exception as e:
        print(f"發生未知錯誤：{e}")
        return {}, {}

# 使用範例：
enterprise_attack_url = 'https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json'

tactic_id_name_map, technique_id_name_map = get_mitre_attack_mappings(enterprise_attack_url)

print("--- Tactic (策略) 對應表 ---")
# 根據 ID 排序輸出，更易讀
sorted_tactics = sorted(tactic_id_name_map.items())
for tid, tname in sorted_tactics:
    print(f"{tid}: {tname}")

print("\n--- Technique (技術) 對應表 (部分範例) ---")
# 根據 ID 排序輸出，只顯示前 20 個作為範例，因為技術非常多
sorted_techniques = sorted(technique_id_name_map.items())
for i, (teid, tename) in enumerate(sorted_techniques):
    if i < 20: # 只列出前20個作為範例
        print(f"{teid}: {tename}")
    else:
        break

# 你現在可以通過這些字典來查詢 ID 對應的 Name，反之亦然
# print(f"\nTA0001 的名稱是: {tactic_id_name_map.get('TA0001')}")
# print(f"T1003 的名稱是: {technique_id_name_map.get('T1003')}")

In [None]:
import requests
import json

def get_and_save_mitre_attack_mappings_sorted(attack_domain_url, output_json_path):
    """
    從 MITRE ATT&CK STIX JSON 獲取 Tactic 和 Technique 的 ID 和 Name 對應表，
    並將其按 ID 從小到大排序後儲存到一個 JSON 檔案中。

    Args:
        attack_domain_url (str): 攻擊領域的 STIX JSON 檔案 URL。
        output_json_path (str): 輸出的 JSON 檔案路徑。
    """
    tactic_mappings = {}
    technique_mappings = {}

    try:
        print(f"正在從 {attack_domain_url} 下載資料...")
        response = requests.get(attack_domain_url)
        response.raise_for_status()  # 檢查 HTTP 請求是否成功
        attack_data = response.json()
        print("資料下載完成，正在解析...")

        for obj in attack_data['objects']:
            # 處理 Tactics
            if obj.get('type') == 'x-mitre-tactic':
                tactic_id = obj.get('external_references')[0]['external_id'] if obj.get('external_references') else None
                tactic_name = obj.get('name')
                if tactic_id and tactic_name:
                    tactic_mappings[tactic_id] = tactic_name
            
            # 處理 Techniques (包括 Sub-techniques)
            elif obj.get('type') == 'attack-pattern':
                technique_id = obj.get('external_references')[0]['external_id'] if obj.get('external_references') else None
                technique_name = obj.get('name')
                if technique_id and technique_name:
                    technique_mappings[technique_id] = technique_name
        
        # --- 排序步驟 ---
        # 1. 將字典轉換為鍵值對的列表
        # 2. 根據鍵 (ID) 進行排序
        # 3. 創建一個新的有序字典
        
        # 排序 Tactics
        sorted_tactic_mappings = dict(sorted(tactic_mappings.items()))
        
        # 排序 Techniques
        sorted_technique_mappings = dict(sorted(technique_mappings.items()))

        # 將兩個排序後的字典包裝在一個主字典中
        all_mappings = {
            "tactics": sorted_tactic_mappings,
            "techniques": sorted_technique_mappings
        }

        # 將結果寫入 JSON 檔案
        with open(output_json_path, 'w', encoding='utf-8') as f:
            json.dump(all_mappings, f, ensure_ascii=False, indent=4)
        
        print(f"成功將 ATT&CK 對應表（已按 ID 排序）儲存到 '{output_json_path}'。")

    except requests.exceptions.RequestException as e:
        print(f"錯誤：無法從 URL 獲取資料。請檢查網路連線或 URL：{e}")
    except json.JSONDecodeError:
        print(f"錯誤：無法解析 JSON 資料。請檢查 URL 的內容是否為有效的 JSON。")
    except Exception as e:
        print(f"發生未知錯誤：{e}")

# --- 使用範例 ---
enterprise_attack_url = 'https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json'
output_file = 'mitre_attack_mappings_sorted.json' # 你想儲存的 JSON 檔名

get_and_save_mitre_attack_mappings_sorted(enterprise_attack_url, output_file)

# 你可以像之前一樣讀取這個 JSON 檔案，並且會發現裡面的鍵已經是排序好的了
# with open('mitre_attack_mappings_sorted.json', 'r', encoding='utf-8') as f:
#     loaded_mappings_sorted = json.load(f)
#
# print("\n--- 讀取後的 Tactic (策略) 對應表範例 (已排序) ---")
# # 迭代字典，你會看到它們是排序好的
# for tid, tname in loaded_mappings_sorted['tactics'].items():
#     print(f"{tid}: {tname}")
#     break # 只印出第一個作為範例

In [None]:
import requests
import json
import csv
from collections import defaultdict

# --- 1. 獲取 ATT&CK 資料的函數 (與之前相同，無需修改) ---
def get_mitre_attack_data(attack_domain_url):
    """
    從 MITRE ATT&CK STIX JSON 獲取所有 Tactic 和 Technique 的詳細資訊。
    """
    tactic_id_name_map = {}
    technique_id_name_map = {}
    technique_name_id_map = {}
    tactic_id_to_techniques_map = defaultdict(list)
    technique_id_to_tactic_ids_map = defaultdict(list)

    try:
        print(f"正在從 {attack_domain_url} 下載 ATT&CK 資料...")
        response = requests.get(attack_domain_url)
        response.raise_for_status()
        attack_data = response.json()
        print("資料下載完成，正在解析...")

        # 第一遍：建立 Tactic ID 到 Name 的映射
        for obj in attack_data['objects']:
            if obj.get('type') == 'x-mitre-tactic':
                tactic_id = obj.get('external_references')[0]['external_id'] if obj.get('external_references') else None
                tactic_name = obj.get('name')
                if tactic_id and tactic_name:
                    tactic_id_name_map[tactic_id] = tactic_name
        
        # 第二遍：建立 Technique 相關映射，並構建 Technique 與 Tactic 的關係
        for obj in attack_data['objects']:
            if obj.get('type') == 'attack-pattern':
                technique_id = obj.get('external_references')[0]['external_id'] if obj.get('external_references') else None
                technique_name = obj.get('name')
                
                if technique_id and technique_name:
                    technique_id_name_map[technique_id] = technique_name
                    technique_name_id_map[technique_name] = technique_id # 這是我們需要用到的純 Technique 名稱映射

                    if 'kill_chain_phases' in obj:
                        for phase in obj['kill_chain_phases']:
                            phase_name = phase.get('phase_name')
                            if phase_name:
                                # 找到對應的 Tactic ID
                                for tid, tname in tactic_id_name_map.items():
                                    if tname.lower() == phase_name:
                                        tactic_id_to_techniques_map[tid].append(technique_id)
                                        technique_id_to_tactic_ids_map[technique_id].append(tid)
                                        break
        
        # 對結果進行排序 (可選，但讓輸出更有序)
        tactic_id_name_map = dict(sorted(tactic_id_name_map.items()))
        technique_id_name_map = dict(sorted(technique_id_name_map.items()))
        technique_name_id_map = dict(sorted(technique_name_id_map.items()))
        tactic_id_to_techniques_map = {k: sorted(v) for k, v in sorted(tactic_id_to_techniques_map.items())}
        technique_id_to_tactic_ids_map = {k: sorted(v) for k, v in sorted(technique_id_to_tactic_ids_map.items())}

        return (tactic_id_name_map, technique_id_name_map, 
                technique_name_id_map, tactic_id_to_techniques_map, 
                technique_id_to_tactic_ids_map)

    except requests.exceptions.RequestException as e:
        print(f"錯誤：無法從 URL 獲取資料。請檢查網路連線或 URL：{e}")
        return {}, {}, {}, defaultdict(list), defaultdict(list)
    except json.JSONDecodeError:
        print(f"錯誤：無法解析 JSON 資料。請檢查 URL 的內容是否為有效的 JSON。")
        return {}, {}, {}, defaultdict(list), defaultdict(list)
    except Exception as e:
        print(f"發生未知錯誤：{e}")
        return {}, {}, {}, defaultdict(list), defaultdict(list)

# --- 2. 分割並轉換 CSV 的函數 (主要修改點在這裡) ---
def split_csv_by_tactic_and_technique(input_csv_path, output_tactic_csv_path, output_technique_csv_path, attack_data_maps):
    """
    讀取原始 CSV，其標題是 "Tactic Name - Technique Name" 格式，
    將資料分割為 Tactic CSV 和 Technique CSV。
    """
    tactic_id_name_map, technique_id_name_map, technique_name_id_map, \
    tactic_id_to_techniques_map, technique_id_to_tactic_ids_map = attack_data_maps
    

    if not tactic_id_name_map or not technique_id_name_map:
        print("ATT&CK 映射資料不足，無法進行轉換。")
        return

    try:
        with open(input_csv_path, 'r', newline='', encoding='utf-8') as infile:
            reader = csv.reader(infile)
            original_header = next(reader) # 原始 CSV 的標題

            # 儲存原始 CSV 標題到其對應的 Technique ID 的映射
            # {原始列索引: technique_id}
            original_col_index_to_tech_id = {}
            
            print("\n正在解析原始 CSV 標題並匹配 ATT&CK 技術...")
            for i, full_tech_name_in_csv in enumerate(original_header):
                # 假設格式是 "Tactic Name - Technique Name"
                parts = full_tech_name_in_csv.split(' - ', 1) # 只分割一次，避免 Technique 名稱中也包含 '-'
                if len(parts) == 2:
                    tactic_name_from_csv = parts[0].strip()
                    tech_name_from_csv = parts[1].strip()
                elif len(parts) == 1:
                    # 如果沒有 '-'，可能它本身就是 Technique 名稱，或者格式不對
                    tech_name_from_csv = parts[0].strip()
                    tactic_name_from_csv = None # 無法從標題推斷 Tactic
                else:
                    tech_name_from_csv = full_tech_name_in_csv.strip() # 保留原始名稱
                    tactic_name_from_csv = None

                print(f"處理原始標題 '{full_tech_name_in_csv}'，Tactic: '{tactic_name_from_csv}', Technique: '{tech_name_from_csv}'...")

                # 使用純粹的 Technique 名稱進行查找
                tech_id = technique_name_id_map.get(tech_name_from_csv)
                
                if tech_id:
                    # 進一步檢查 Tactic 是否匹配 (可選，但更嚴謹)
                    # 獲取該 Technique 實際所屬的 Tactic ID 列表
                    actual_tactic_ids_for_tech = technique_id_to_tactic_ids_map.get(tech_id, [])
                    
                    # 檢查 CSV 中提供的 Tactic 名稱是否與實際匹配
                    tactic_match = False
                    if tactic_name_from_csv:
                        print("actual_tactic_ids_for_tech:", actual_tactic_ids_for_tech)
                        for actual_tid in actual_tactic_ids_for_tech:
                            print(f"檢查 Tactic '{tactic_name_from_csv}' ({actual_tid}), '{tactic_id_name_map.get(actual_tid)}' 是否匹配...")
                            if tactic_id_name_map.get(actual_tid) == tactic_name_from_csv:
                                tactic_match = True
                                # print(f"匹配成功！使用 Tactic ID: {actual_tid}")
                    else: # 如果 CSV 標題沒有 Tactic 部分，只要技術本身有效就接受
                        tactic_match = True 

                    if tactic_match:
                        original_col_index_to_tech_id[i] = tech_id
                    else:
                        print(f"警告：原始 CSV 標題 '{full_tech_name_in_csv}' 找到技術 '{tech_name_from_csv}' ({tech_id})，但其 Tactic '{tactic_name_from_csv}' 不匹配 ATT&CK 資料。該列將被忽略。")

                else:
                    print(f"警告：原始 CSV 標題 '{full_tech_name_in_csv}' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。")

            # --- 處理 Technique CSV ---
            # 新的 Technique CSV 標題：所有有效的 Technique ID (按 ID 排序)
            technique_csv_header = sorted(list(set(original_col_index_to_tech_id.values())))
            
            # Technique CSV 數據行
            technique_csv_data_rows = []

            # --- 處理 Tactic CSV ---
            # 新的 Tactic CSV 標題：所有 Tactic ID (按 ID 排序)
            tactic_csv_header = sorted(tactic_id_name_map.keys())
            
            # Tactic CSV 數據行
            tactic_csv_data_rows = []

            # 遍歷原始 CSV 的每一行資料
            for row_num, original_row_values in enumerate(reader):
                # 用於存儲當前行的 Technique 值，按 Technique ID 排序
                current_tech_id_values = {tech_id: '0' for tech_id in technique_csv_header} # 預設為 '0'

                # 填充 current_tech_id_values
                for original_index, tech_id in original_col_index_to_tech_id.items():
                    if original_index < len(original_row_values):
                        current_tech_id_values[tech_id] = original_row_values[original_index]
                
                # 添加到 Technique CSV 的數據行 (按排好的標題順序)
                technique_csv_data_rows.append([current_tech_id_values[tech_id] for tech_id in technique_csv_header])

                # 計算 Tactic 值 (例如：任何一個相關 Technique 為 '1' 則 Tactic 為 '1')
                current_tactic_id_values = {}
                for tactic_id in tactic_csv_header:
                    tactic_has_one = '0' # 預設為 '0'
                    associated_techniques = tactic_id_to_techniques_map.get(tactic_id, [])
                    for tech_id in associated_techniques:
                        if current_tech_id_values.get(tech_id) == '1':
                            tactic_has_one = '1'
                            break # 找到一個 '1' 就夠了
                    current_tactic_id_values[tactic_id] = tactic_has_one
                
                # 添加到 Tactic CSV 的數據行 (按排好的標題順序)
                tactic_csv_data_rows.append([current_tactic_id_values[tactic_id] for tactic_id in tactic_csv_header])

        # --- 寫入 Technique CSV 檔案 ---
        with open(output_technique_csv_path, 'w', newline='', encoding='utf-8') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(technique_csv_header) # 寫入標題
            writer.writerows(technique_csv_data_rows) # 寫入數據
        print(f"成功將 Technique 資料儲存到 '{output_technique_csv_path}'。")

        # --- 寫入 Tactic CSV 檔案 ---
        with open(output_tactic_csv_path, 'w', newline='', encoding='utf-8') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(tactic_csv_header) # 寫入標題
            writer.writerows(tactic_csv_data_rows) # 寫入數據
        print(f"成功將 Tactic 資料儲存到 '{output_tactic_csv_path}'。")

    except FileNotFoundError:
        print(f"錯誤：找不到檔案 '{input_csv_path}'。請檢查路徑是否正確。")
    except Exception as e:
        print(f"處理 CSV 檔案時發生錯誤：{e}")


# --- 主執行部分 ---
if __name__ == "__main__":
    enterprise_attack_url = 'https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json'
    
    # 1. 獲取 ATT&CK 映射和關係
    attack_data_maps = get_mitre_attack_data(enterprise_attack_url)
    
    # 檢查是否成功獲取資料
    if not attack_data_maps[0] or not attack_data_maps[1]: # 檢查 tactic_id_name_map 和 technique_id_name_map
        print("無法獲取 ATT&CK 映射資料，程序終止。")
    else:
        print("ATT&CK 資料獲取成功。")

        # 3. 處理你的 CSV 檔案並輸出到兩個獨立的 CSV
        input_file = 'train_dataset_list_output_Y.csv'
        output_tactic_file = 'output_tactics_split.csv'
        output_technique_file = 'output_techniques_split.csv'

        split_csv_by_tactic_and_technique(
            input_file, 
            output_tactic_file, 
            output_technique_file, 
            attack_data_maps
        )

        print("\n查看 output_tactics_split.csv 和 output_techniques_split.csv 以檢視結果。")

In [None]:
tactic_id_name_map, technique_id_name_map, technique_name_id_map, \
tactic_id_to_techniques_map, technique_id_to_tactic_ids_map = attack_data_maps
print("technique_id_to_tactic_ids_map:",technique_id_to_tactic_ids_map)

In [None]:
print(technique_id_name_map['T1583'])

In [None]:
print(technique_id_to_tactic_ids_map['T1583'])

In [None]:
# 2. 獲取 TA0001 (Initial Access) 的所有 Technique
tactic_id_to_query = 'TA0010' 

# 檢查 tactic_id_to_query 是否存在
if tactic_id_to_query in tactic_id_name_map:
    tactic_name = tactic_id_name_map[tactic_id_to_query]
    print(f"\n--- 正在取得 Tactic '{tactic_name}' (ID: {tactic_id_to_query}) 的所有 Technique ---")
    
    # 從 tactic_id_to_techniques_map 獲取 Technique ID 列表
    techniques_for_tactic = tactic_id_to_techniques_map.get(tactic_id_to_query, [])
    
    if techniques_for_tactic:
        print(f"Tactic '{tactic_name}' (ID: {tactic_id_to_query}) 包含以下 Technique:")
        for tech_id in techniques_for_tactic:
            tech_name = technique_id_name_map.get(tech_id, "未知名稱")
            print(f"  - {tech_id}: {tech_name}")
    else:
        print(f"Tactic '{tactic_name}' (ID: {tactic_id_to_query}) 未找到任何 Technique 關聯。")
else:
    print(f"錯誤：找不到 Tactic ID '{tactic_id_to_query}'。請檢查 ID 是否正確。")

# Another method


In [1]:
from attackcti import attack_client
from collections import defaultdict

# 初始化 ATT&CK Client 並取得 Enterprise 資料
client = attack_client()
enterprise = client.get_enterprise()

# 儲存映射資料結構
tactic_id_name_map = {}
technique_id_name_map = {}
technique_name_id_map = {}
tactic_id_to_techniques_map = defaultdict(list)
technique_id_to_tactic_ids_map = defaultdict(list)

# 先建立 tactic_id -> tactic_name 的對照表
for tactic in enterprise["tactics"]:
    tactic_id = tactic["external_references"][0]["external_id"]
    tactic_name = tactic["name"]
    tactic_id_name_map[tactic_id] = tactic_name

# 建立 technique 映射與關聯到 tactic
for technique in enterprise["techniques"]:
    tech_id = technique["external_references"][0]["external_id"]
    tech_name = technique["name"]
    
    technique_id_name_map[tech_id] = tech_name
    technique_name_id_map[tech_name] = tech_id

    for phase in technique.get("kill_chain_phases", []):
        tactic_name = phase["phase_name"]
        
        # 找出對應的 tactic_id
        tactic_id = next(
            (tid for tid, tname in tactic_id_name_map.items() if tname.lower() == tactic_name.lower()),
            None
        )
        if tactic_id:
            tactic_id_to_techniques_map[tactic_id].append(tech_id)
            technique_id_to_tactic_ids_map[tech_id].append(tactic_id)




In [2]:
# ✅ 印出部分驗證結果
print("✅ tactic_id_name_map 範例：")
print(dict(list(tactic_id_name_map.items())[:3]))

print("\n✅ technique_id_name_map 範例：")
print(dict(list(technique_id_name_map.items())[:3]))

print("\n✅ tactic_id_to_techniques_map 範例：")
for tactic_id, tech_list in list(tactic_id_to_techniques_map.items())[:1]:
    print(f"{tactic_id} -> {tech_list[:5]}...")

print("\n✅ technique_id_to_tactic_ids_map 範例：")
for tech_id, tactic_list in list(technique_id_to_tactic_ids_map.items())[:1]:
    print(f"{tech_id} -> {tactic_list}")

✅ tactic_id_name_map 範例：
{'TA0006': 'Credential Access', 'TA0002': 'Execution', 'TA0040': 'Impact'}

✅ technique_id_name_map 範例：
{'T1055.011': 'Extra Window Memory Injection', 'T1053.005': 'Scheduled Task', 'T1205.002': 'Socket Filters'}

✅ tactic_id_to_techniques_map 範例：
TA0002 -> ['T1053.005', 'T1047', 'T1129', 'T1059.007', 'T1053.007']...

✅ technique_id_to_tactic_ids_map 範例：
T1053.005 -> ['TA0002', 'TA0003']


In [3]:
# 建立對照表：tactic 名稱小寫 -> ID
tactic_name_to_id_map = {
    tactic["name"].lower(): tactic["external_references"][0]["external_id"]
    for tactic in enterprise["tactics"]
}

# 將 tactic_id_name_map 一併建立
tactic_id_name_map = {
    tactic["external_references"][0]["external_id"]: tactic["name"]
    for tactic in enterprise["tactics"]
}

# 處理 technique -> tactic 對應
for technique in enterprise["techniques"]:
    tech_id = technique["external_references"][0]["external_id"]
    tech_name = technique["name"]

    technique_id_name_map[tech_id] = tech_name
    technique_name_id_map[tech_name] = tech_id

    for phase in technique.get("kill_chain_phases", []):
        tactic_name_lc = phase["phase_name"].lower()
        converted_tactic_name = tactic_name_lc.replace('-', ' ')
        tactic_id = tactic_name_to_id_map.get(converted_tactic_name)

        print(converted_tactic_name, tactic_id)

        if tactic_id:
            tactic_id_to_techniques_map[tactic_id].append(tech_id)
            technique_id_to_tactic_ids_map[tech_id].append(tactic_id)


defense evasion TA0005
privilege escalation TA0004
execution TA0002
persistence TA0003
privilege escalation TA0004
defense evasion TA0005
persistence TA0003
command and control TA0011
collection TA0009
lateral movement TA0008
execution TA0002
collection TA0009
defense evasion TA0005
persistence TA0003
privilege escalation TA0004
credential access TA0006
collection TA0009
discovery TA0007
resource development TA0042
defense evasion TA0005
discovery TA0007
resource development TA0042
command and control TA0011
defense evasion TA0005
credential access TA0006
defense evasion TA0005
persistence TA0003
defense evasion TA0005
reconnaissance TA0043
reconnaissance TA0043
collection TA0009
credential access TA0006
defense evasion TA0005
defense evasion TA0005
credential access TA0006
defense evasion TA0005
reconnaissance TA0043
credential access TA0006
execution TA0002
collection TA0009
impact TA0040
impact TA0040
persistence TA0003
privilege escalation TA0004
defense evasion TA0005
collection T

In [4]:
import re
for tactic_id, techniques_list in tactic_id_to_techniques_map.items():
    techniques_to_add = set() # 用set來避免重複
    for tech_id in techniques_list:
        # 檢查是否為子技術 (例如 T1205.002)
        if re.fullmatch(r'T\d{4}\.\d{3}', tech_id):
            parent_tech_id = tech_id.split('.')[0] # 提取父技術ID (T1205)
            # 確保這個父技術ID確實存在於我們的技術列表中 (避免引入不存在的ID)
            techniques_to_add.add(parent_tech_id)
    
    # 將新的父技術添加到該 Tactic 的列表中
    tactic_id_to_techniques_map[tactic_id].extend(list(techniques_to_add))
    
    # 確保每個 Tactic 的 Technique 列表是去重且排序的
    tactic_id_to_techniques_map[tactic_id] = sorted(list(set(tactic_id_to_techniques_map[tactic_id])))


In [5]:
import requests
import json
import csv
from collections import defaultdict

def split_csv_by_tactic_and_technique(input_csv_path, output_tactic_csv_path, output_technique_csv_path, 
                                      tactic_id_name_map, technique_id_name_map, tactic_id_to_techniques_map, 
                                      technique_id_to_tactic_ids_map):
    """
    讀取原始 CSV，其標題是 "Tactic Name - Technique Name" 格式，
    將資料分割為 Tactic CSV 和 Technique CSV。
    """


    if not tactic_id_name_map or not technique_id_name_map:
        print("ATT&CK 映射資料不足，無法進行轉換。")
        return

    try:
        with open(input_csv_path, 'r', newline='', encoding='utf-8') as infile:
            reader = csv.reader(infile)
            original_header = next(reader) # 原始 CSV 的標題

            # 儲存原始 CSV 標題到其對應的 Technique ID 的映射
            # {原始列索引: technique_id}
            original_col_index_to_tech_id = {}
            
            # print("\n正在解析原始 CSV 標題並匹配 ATT&CK 技術...")
            for i, full_tech_name_in_csv in enumerate(original_header):
                # 假設格式是 "Tactic Name - Technique Name"
                parts = full_tech_name_in_csv.split(' - ', 1) # 只分割一次，避免 Technique 名稱中也包含 '-'
                if len(parts) == 2:
                    tactic_name_from_csv = parts[0].strip()
                    tech_name_from_csv = parts[1].strip()
                elif len(parts) == 1:
                    # 如果沒有 '-'，可能它本身就是 Technique 名稱，或者格式不對
                    tech_name_from_csv = parts[0].strip()
                    tactic_name_from_csv = None # 無法從標題推斷 Tactic
                else:
                    tech_name_from_csv = full_tech_name_in_csv.strip() # 保留原始名稱
                    tactic_name_from_csv = None

                # print(f"處理原始標題 '{full_tech_name_in_csv}'，Tactic: '{tactic_name_from_csv}', Technique: '{tech_name_from_csv}'...")

                # 使用純粹的 Technique 名稱進行查找
                tech_id = technique_name_id_map.get(tech_name_from_csv)
                
                if tech_id:
                    # 進一步檢查 Tactic 是否匹配 (可選，但更嚴謹)
                    # 獲取該 Technique 實際所屬的 Tactic ID 列表
                    actual_tactic_ids_for_tech = technique_id_to_tactic_ids_map.get(tech_id, [])
                    
                    # 檢查 CSV 中提供的 Tactic 名稱是否與實際匹配
                    tactic_match = False
                    if tactic_name_from_csv:
                        # print("actual_tactic_ids_for_tech:", actual_tactic_ids_for_tech)
                        for actual_tid in actual_tactic_ids_for_tech:
                            # print(f"檢查 Tactic '{tactic_name_from_csv}' ({actual_tid}), '{tactic_id_name_map.get(actual_tid)}' 是否匹配...")
                            if tactic_id_name_map.get(actual_tid).lower() == tactic_name_from_csv.lower():
                                tactic_match = True
                                # print(f"匹配成功！使用 Tactic ID: {actual_tid}")
                    else: # 如果 CSV 標題沒有 Tactic 部分，只要技術本身有效就接受
                        tactic_match = True 

                    if tactic_match:
                        original_col_index_to_tech_id[i] = tech_id
                    else:
                        print(f"警告：原始 CSV 標題 '{full_tech_name_in_csv}' 找到技術 '{tech_name_from_csv}' ({tech_id})，但其 Tactic '{tactic_name_from_csv}' 不匹配 ATT&CK 資料。該列將被忽略。")
                        # if tactic_name_from_csv:
                        #     # print("actual_tactic_ids_for_tech:", actual_tactic_ids_for_tech)
                        #     for actual_tid in actual_tactic_ids_for_tech:
                        #         print(f"檢查 Tactic '{tactic_name_from_csv}' ({actual_tid}), '{tactic_id_name_map.get(actual_tid)}' 是否匹配...")
                        #         if tactic_id_name_map.get(actual_tid) == tactic_name_from_csv:
                        #             tactic_match = True
                        #             print(f"匹配成功！使用 Tactic ID: {actual_tid}")
                        # else: # 如果 CSV 標題沒有 Tactic 部分，只要技術本身有效就接受
                        #     tactic_match = True 
                else:
                
                    print(f"警告：原始 CSV 標題 '{full_tech_name_in_csv}' ,'{tech_name_from_csv}' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。")

            # --- 處理 Technique CSV ---
            # 新的 Technique CSV 標題：所有有效的 Technique ID (按 ID 排序)
            technique_csv_header = sorted(list(set(original_col_index_to_tech_id.values())))
            
            # Technique CSV 數據行
            technique_csv_data_rows = []

            # --- 處理 Tactic CSV ---
            # 新的 Tactic CSV 標題：所有 Tactic ID (按 ID 排序)
            tactic_csv_header = sorted(tactic_id_name_map.keys())
            
            # Tactic CSV 數據行
            tactic_csv_data_rows = []

            # 遍歷原始 CSV 的每一行資料
            for row_num, original_row_values in enumerate(reader):
                # 用於存儲當前行的 Technique 值，按 Technique ID 排序
                current_tech_id_values = {tech_id: '0' for tech_id in technique_csv_header} # 預設為 '0'

                # 填充 current_tech_id_values
                for original_index, tech_id in original_col_index_to_tech_id.items():
                    if original_index < len(original_row_values):
                        current_tech_id_values[tech_id] = original_row_values[original_index]
                
                # 添加到 Technique CSV 的數據行 (按排好的標題順序)
                technique_csv_data_rows.append([current_tech_id_values[tech_id] for tech_id in technique_csv_header])

                # 計算 Tactic 值 (例如：任何一個相關 Technique 為 '1' 則 Tactic 為 '1')
                current_tactic_id_values = {}
                for tactic_id in tactic_csv_header:
                    tactic_has_one = '0' # 預設為 '0'
                    associated_techniques = tactic_id_to_techniques_map.get(tactic_id, [])
                    for tech_id in associated_techniques:
                        if current_tech_id_values.get(tech_id) == '1':
                            tactic_has_one = '1'
                            break # 找到一個 '1' 就夠了
                    current_tactic_id_values[tactic_id] = tactic_has_one
                
                # 添加到 Tactic CSV 的數據行 (按排好的標題順序)
                tactic_csv_data_rows.append([current_tactic_id_values[tactic_id] for tactic_id in tactic_csv_header])

        # --- 寫入 Technique CSV 檔案 ---
        with open(output_technique_csv_path, 'w', newline='', encoding='utf-8') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(technique_csv_header) # 寫入標題
            writer.writerows(technique_csv_data_rows) # 寫入數據
        print(f"成功將 Technique 資料儲存到 '{output_technique_csv_path}'。")

        # --- 寫入 Tactic CSV 檔案 ---
        with open(output_tactic_csv_path, 'w', newline='', encoding='utf-8') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(tactic_csv_header) # 寫入標題
            writer.writerows(tactic_csv_data_rows) # 寫入數據
        print(f"成功將 Tactic 資料儲存到 '{output_tactic_csv_path}'。")

    except FileNotFoundError:
        print(f"錯誤：找不到檔案 '{input_csv_path}'。請檢查路徑是否正確。")
    except Exception as e:
        print(f"處理 CSV 檔案時發生錯誤：{e}")


# --- 主執行部分 ---
if __name__ == "__main__":
    

    # 3. 處理你的 CSV 檔案並輸出到兩個獨立的 CSV
    input_file = 'train_dataset_list_output_Y.csv'
    output_tactic_file = 'output_tactics_split.csv'
    output_technique_file = 'output_techniques_split.csv'

    split_csv_by_tactic_and_technique(
        input_file, 
        output_tactic_file, 
        output_technique_file, 
        tactic_id_name_map,
        technique_id_name_map,
        tactic_id_to_techniques_map,
        technique_id_to_tactic_ids_map
    )

    print("\n查看 output_tactics_split.csv 和 output_techniques_split.csv 以檢視結果。")

警告：原始 CSV 標題 'Persistence - Compromise Client Software Binary' ,'Compromise Client Software Binary' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
警告：原始 CSV 標題 'Privilege Escalation - Domain Policy Modification' ,'Domain Policy Modification' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
警告：原始 CSV 標題 'Defense Evasion - Domain Policy Modification' ,'Domain Policy Modification' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
警告：原始 CSV 標題 'Defense Evasion - Indicator Removal on Host' ,'Indicator Removal on Host' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
警告：原始 CSV 標題 'Discovery - Browser Bookmark Discovery' ,'Browser Bookmark Discovery' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
警告：原始 CSV 標題 'Collection - Data from Cloud Storage Object' ,'Data from Cloud Storage Object' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
警告：原始 CSV 標題 'Command And Control - Remote Access Software' ,'Remote Access Software' 無法在 ATT&CK 技術中找到匹配。該列將被忽略。
成功將 Technique 資料儲存到 'output_techniques_split.csv'。
成功將 Tactic 資料儲存到 'output_tactics_split.csv'。

查看 output_tactics_split.csv 和 output_techniques_split.csv 以檢視結果。


In [6]:
tactic_id_to_main_techniques_map = defaultdict(list)
for tactic_id, techniques_list in tactic_id_to_techniques_map.items():
        main_techniques_for_tactic = set() # 使用 set 避免重複

        for tech_id in techniques_list:
            # 檢查技術是否為子技術
            is_sub_technique = bool(re.fullmatch(r'T\d{4}\.\d{3}', tech_id))

            if not is_sub_technique:
                # 如果是主技術，直接加入
                main_techniques_for_tactic.add(tech_id)
            else:
                # 如果是子技術，提取其父技術
                parent_tech_id = tech_id.split('.')[0]
        
        # 將去重並排序後的父技術添加到新映射中
        tactic_id_to_main_techniques_map[tactic_id] = sorted(list(main_techniques_for_tactic))

In [48]:
print(tactic_id_to_main_techniques_map)

defaultdict(<class 'list'>, {'TA0002': ['T1047', 'T1053', 'T1059', 'T1072', 'T1106', 'T1129', 'T1203', 'T1204', 'T1559', 'T1569', 'T1609', 'T1610', 'T1648', 'T1651', 'T1674', 'T1675'], 'TA0003': ['T1037', 'T1053', 'T1078', 'T1098', 'T1112', 'T1133', 'T1136', 'T1137', 'T1176', 'T1197', 'T1205', 'T1505', 'T1525', 'T1542', 'T1543', 'T1546', 'T1547', 'T1554', 'T1556', 'T1574', 'T1653', 'T1668', 'T1671'], 'TA0009': ['T1005', 'T1025', 'T1039', 'T1056', 'T1074', 'T1113', 'T1114', 'T1115', 'T1119', 'T1123', 'T1125', 'T1185', 'T1213', 'T1530', 'T1557', 'T1560', 'T1602'], 'TA0007': ['T1007', 'T1010', 'T1012', 'T1016', 'T1018', 'T1033', 'T1040', 'T1046', 'T1049', 'T1057', 'T1069', 'T1082', 'T1083', 'T1087', 'T1120', 'T1124', 'T1135', 'T1201', 'T1217', 'T1482', 'T1497', 'T1518', 'T1526', 'T1538', 'T1580', 'T1613', 'T1614', 'T1615', 'T1619', 'T1622', 'T1652', 'T1654', 'T1673'], 'TA0043': ['T1589', 'T1590', 'T1591', 'T1592', 'T1593', 'T1594', 'T1595', 'T1596', 'T1597', 'T1598'], 'TA0040': ['T1485', 

In [50]:
print(technique_id_name_map)

{'T1055.011': 'Extra Window Memory Injection', 'T1053.005': 'Scheduled Task', 'T1205.002': 'Socket Filters', 'T1560.001': 'Archive via Utility', 'T1021.005': 'VNC', 'T1047': 'Windows Management Instrumentation', 'T1113': 'Screen Capture', 'T1027.011': 'Fileless Storage', 'T1037': 'Boot or Logon Initialization Scripts', 'T1557': 'Adversary-in-the-Middle', 'T1033': 'System Owner/User Discovery', 'T1583': 'Acquire Infrastructure', 'T1218.011': 'Rundll32', 'T1613': 'Container and Resource Discovery', 'T1583.007': 'Serverless', 'T1132.001': 'Standard Encoding', 'T1027.009': 'Embedded Payloads', 'T1556.003': 'Pluggable Authentication Modules', 'T1578.004': 'Revert Cloud Instance', 'T1592': 'Gather Victim Host Information', 'T1596.003': 'Digital Certificates', 'T1056.001': 'Keylogging', 'T1564.012': 'File/Path Exclusions', 'T1222.002': 'Linux and Mac File and Directory Permissions Modification', 'T1110.001': 'Password Guessing', 'T1216.001': 'PubPrn', 'T1597.002': 'Purchase Technical Data', '

In [7]:
all_technique_names = list(technique_id_name_map.keys())
print(all_technique_names)
#去除子技術名稱
main_technique_names = [name for name in all_technique_names if not re.fullmatch(r'.*\.\d{3}', name)]
print(main_technique_names)
print(len(main_technique_names))


['T1055.011', 'T1053.005', 'T1205.002', 'T1560.001', 'T1021.005', 'T1047', 'T1113', 'T1027.011', 'T1037', 'T1557', 'T1033', 'T1583', 'T1218.011', 'T1613', 'T1583.007', 'T1132.001', 'T1027.009', 'T1556.003', 'T1578.004', 'T1592', 'T1596.003', 'T1056.001', 'T1564.012', 'T1222.002', 'T1110.001', 'T1216.001', 'T1597.002', 'T1003', 'T1129', 'T1602', 'T1561.002', 'T1498.001', 'T1574.007', 'T1213.002', 'T1006', 'T1588.007', 'T1666', 'T1564.008', 'T1491.002', 'T1027.013', 'T1590.005', 'T1499.001', 'T1014', 'T1546.013', 'T1059.007', 'T1590.002', 'T1485.001', 'T1123', 'T1543', 'T1133', 'T1546.006', 'T1539', 'T1053.007', 'T1568.002', 'T1036.007', 'T1548.002', 'T1496.003', 'T1016.001', 'T1548.003', 'T1560.003', 'T1578', 'T1584.008', 'T1583.008', 'T1069', 'T1114', 'T1003.002', 'T1596.002', 'T1542.001', 'T1594', 'T1069.003', 'T1574.011', 'T1596.001', 'T1499.003', 'T1195.001', 'T1588.004', 'T1583.002', 'T1561', 'T1071.004', 'T1552.005', 'T1555.002', 'T1615', 'T1542.003', 'T1025', 'T1218.013', 'T1074.

In [8]:
TECHNIQUE = ['T1546.010', 'T1205', 'T1546', 'T1189', 'T1553.005', 'T1550', 'T1048', 'T1087.002', 'T1021.001', 'T1598.003', 'T1200', 'T1531', 'T1491.001', 'T1132.001', 'T1055.001', 'T1498.001', 'T1555.005', 'T1102.003', 'T1578.003', 'T1592.002', 'T1090.001', 'T1003.002', 'T1562.002', 'T1619', 'T1021.004', 'T1134.003', 'T1029', 'T1567.002', 'T1561.001', 'T1490', 'T1011.001', 'T1518.001', 'T1210', 'T1497', 'T1072', 'T1134.004', 'T1595.003', 'T1547.012', 'T1498.002', 'T1491', 'T1552.003', 'T1001.002', 'T1585.001', 'T1114', 'T1098.001', 'T1542.003', 'T1622', 'T1563.001', 'T1027.005', 'T1001.001', 'T1495', 'T1505', 'T1546.009', 'T1056.001', 'T1021.003', 'T1104', 'T1041', 'T1548.004', 'T1040', 'T1105', 'T1525', 'T1074.001', 'T1553.006', 'T1213', 'T1547.007', 'T1589.002', 'T1078', 'T1542.005', 'T1053.007', 'T1112', 'T1137.006', 'T1070.006', 'T1114.002', 'T1115', 'T1562.001', 'T1003.008', 'T1561', 'T1535', 'T1621', 'T1546.012', 'T1546.014', 'T1553.002', 'T1591', 'T1578.002', 'T1012', 'T1021', 'T1053.002', 'T1195.003', 'T1548.002', 'T1136.001', 'T1204.001', 'T1137', 'T1132', 'T1564.008', 'T1102.002', 'T1049', 'T1187', 'T1129', 'T1574.012', 'T1070.005', 'T1573', 'T1547.004', 'T1092', 'T1555.004', 'T1037.002', 'T1596.004', 'T1018', 'T1484.002', 'T1055.004', 'T1037', 'T1590.006', 'T1098.005', 'T1052.001', 'T1110.003', 'T1598.002', 'T1564.010', 'T1584.003', 'T1218', 'T1211', 'T1213.003', 'T1590.003', 'T1584.001', 'T1553.001', 'T1550.001', 'T1573.002', 'T1027.004', 'T1542.004', 'T1564.003', 'T1056.004', 'T1584.004', 'T1027.001', 'T1647', 'T1071.001', 'T1218.003', 'T1565.001', 'T1070.004', 'T1596.003', 'T1555.001', 'T1071.004', 'T1114.001', 'T1588.006', 'T1555.003', 'T1055.009', 'T1608.003', 'T1596.005', 'T1102', 'T1583.006', 'T1568.003', 'T1204.002', 'T1053.005', 'T1587.004', 'T1590.001', 'T1574.009', 'T1590.002', 'T1134.002', 'T1098', 'T1574.013', 'T1059.003', 'T1070.002', 'T1110.004', 'T1596.002', 'T1550.003', 'T1608.005', 'T1588.002', 'T1559.003', 'T1489', 'T1574.007', 'T1559.002', 'T1098.002', 'T1030', 'T1574.005', 'T1564.009', 'T1546.006', 'T1563.002', 'T1087.001', 'T1593.001', 'T1087.004', 'T1552.002', 'T1568.001', 'T1047', 'T1020.001', 'T1588.001', 'T1055', 'T1176', 'T1195.001', 'T1496', 'T1055.005', 'T1080', 'T1059.002', 'T1204', 'T1213.001', 'T1566.003', 'T1615', 'T1573.001', 'T1074', 'T1056.003', 'T1562.008', 'T1505.001', 'T1543.003', 'T1202', 'T1595', 'T1480.001', 'T1056.002', 'T1584.005', 'T1218.010', 'T1207', 'T1125', 'T1574.004', 'T1218.004', 'T1127', 'T1547.001', 'T1599', 'T1553', 'T1068', 'T1547.014', 'T1069', 'T1546.005', 'T1566.002', 'T1195.002', 'T1600.001', 'T1218.013', 'T1526', 'T1070.003', 'T1568', 'T1546.004', 'T1556.005', 'T1201', 'T1137.004', 'T1567.001', 'T1048.002', 'T1562.003', 'T1090', 'T1203', 'T1505.005', 'T1484', 'T1059.008', 'T1059.006', 'T1609', 'T1218.012', 'T1611', 'T1558.003', 'T1499', 'T1595.001', 'T1538', 'T1546.011', 'T1499.002', 'T1124', 'T1599.001', 'T1608.001', 'T1027', 'T1534', 'T1110.002', 'T1574.006', 'T1003.004', 'T1053.003', 'T1001', 'T1220', 'T1006', 'T1036.001', 'T1499.003', 'T1055.002', 'T1559', 'T1546.007', 'T1120', 'T1590', 'T1560', 'T1106', 'T1020', 'T1578.001', 'T1594', 'T1585', 'T1595.002', 'T1055.008', 'T1558.002', 'T1499.001', 'T1055.014', 'T1222.002', 'T1574.011', 'T1098.003', 'T1564.001', 'T1055.015', 'T1591.003', 'T1567', 'T1003', 'T1003.005', 'T1566.001', 'T1585.002', 'T1559.001', 'T1219', 'T1114.003', 'T1588.004', 'T1132.002', 'T1587.001', 'T1552.001', 'T1608.002', 'T1546.013', 'T1583.004', 'T1558.001', 'T1602', 'T1547.009', 'T1606.001', 'T1027.006', 'T1003.003', 'T1588.003', 'T1543.002', 'T1102.001', 'T1547.006', 'T1037.005', 'T1123', 'T1039', 'T1530', 'T1592.003', 'T1204.003', 'T1562.007', 'T1556', 'T1574.010', 'T1046', 'T1091', 'T1542.001', 'T1569.002', 'T1137.002', 'T1222', 'T1596.001', 'T1195', 'T1587.002', 'T1491.002', 'T1216.001', 'T1548.001', 'T1003.006', 'T1136', 'T1565.003', 'T1218.002', 'T1555.002', 'T1078.001', 'T1546.001', 'T1600', 'T1557.002', 'T1090.002', 'T1614.001', 'T1558.004', 'T1036.007', 'T1505.002', 'T1010', 'T1564.007', 'T1529', 'T1565', 'T1564.005', 'T1586', 'T1557', 'T1598', 'T1547.008', 'T1601.002', 'T1218.008', 'T1137.001', 'T1597.002', 'T1578.004', 'T1537', 'T1586.002', 'T1547.002', 'T1036.002', 'T1185', 'T1574', 'T1027.002', 'T1052', 'T1135', 'T1588', 'T1098.004', 'T1027.003', 'T1497.001', 'T1586.001', 'T1016', 'T1600.002', 'T1137.005', 'T1008', 'T1136.003', 'T1003.007', 'T1583.005', 'T1048.001', 'T1601', 'T1606', 'T1133', 'T1564.004', 'T1574.008', 'T1612', 'T1037.003', 'T1574.002', 'T1542.002', 'T1542', 'T1048.003', 'T1059.007', 'T1218.011', 'T1583.001', 'T1071.002', 'T1070', 'T1037.001', 'T1083', 'T1071.003', 'T1546.008', 'T1552.005', 'T1587', 'T1095', 'T1589.001', 'T1482', 'T1003.001', 'T1497.003', 'T1557.001', 'T1021.005', 'T1036.004', 'T1602.001', 'T1557.003', 'T1528', 'T1486', 'T1485', 'T1583', 'T1078.003', 'T1055.012', 'T1566', 'T1222.001', 'T1053.006', 'T1036.003', 'T1016.001', 'T1055.003', 'T1221', 'T1055.013', 'T1218.001', 'T1218.014', 'T1190', 'T1553.003', 'T1571', 'T1140', 'T1033', 'T1218.007', 'T1059.001', 'T1591.001', 'T1056', 'T1011', 'T1596', 'T1078.002', 'T1591.004', 'T1547', 'T1561.002', 'T1082', 'T1543.004', 'T1547.010', 'T1090.004', 'T1069.002', 'T1555', 'T1570', 'T1078.004', 'T1608', 'T1021.006', 'T1480', 'T1560.002', 'T1608.004', 'T1547.003', 'T1569', 'T1565.002', 'T1218.005', 'T1110.001', 'T1583.002', 'T1134.001', 'T1539', 'T1550.004', 'T1087', 'T1597', 'T1505.004', 'T1606.002', 'T1069.001', 'T1087.003', 'T1484.001', 'T1505.003', 'T1543.001', 'T1593', 'T1614', 'T1499.004', 'T1568.002', 'T1546.003', 'T1059.005', 'T1580', 'T1553.004', 'T1552', 'T1213.002', 'T1589', 'T1071', 'T1597.001', 'T1554', 'T1569.001', 'T1601.001', 'T1584', 'T1036', 'T1584.002', 'T1572', 'T1556.003', 'T1036.006', 'T1591.002', 'T1199', 'T1547.015', 'T1552.006', 'T1134', 'T1074.002', 'T1216', 'T1620', 'T1057', 'T1055.011', 'T1548.003', 'T1564', 'T1218.009', 'T1563', 'T1590.004', 'T1552.004', 'T1005', 'T1021.002', 'T1564.002', 'T1547.013', 'T1070.001', 'T1613', 'T1588.005', 'T1025', 'T1127.001', 'T1212', 'T1205.001', 'T1543', 'T1562', 'T1014', 'T1562.004', 'T1119', 'T1610', 'T1550.002', 'T1546.002', 'T1111', 'T1560.001', 'T1547.005', 'T1592.004', 'T1059', 'T1498', 'T1037.004', 'T1552.007', 'T1136.002', 'T1113', 'T1587.003', 'T1548', 'T1090.003', 'T1592', 'T1564.006', 'T1556.004', 'T1590.005', 'T1589.003', 'T1562.010', 'T1578', 'T1562.009', 'T1562.006', 'T1598.001', 'T1592.001', 'T1110', 'T1069.003', 'T1546.015', 'T1497.002', 'T1584.006', 'T1137.003', 'T1556.001', 'T1059.004', 'T1556.002', 'T1602.002', 'T1593.002', 'T1583.003', 'T1574.001', 'T1134.005', 'T1518', 'T1197', 'T1036.005', 'T1558', 'T1007', 'T1001.003', 'T1053', 'T1217', 'T1560.003']
#去除子技術名稱
TECHNIQUE = [tech for tech in TECHNIQUE if not re.fullmatch(r'.*\.\d{3}', tech)]
# 印出 TECHNIQUE 的長度
print(len(TECHNIQUE))

191


In [75]:
main_technique_names.sort()
print(main_technique_names)

['T1001', 'T1003', 'T1005', 'T1006', 'T1007', 'T1008', 'T1010', 'T1011', 'T1012', 'T1014', 'T1016', 'T1018', 'T1020', 'T1021', 'T1025', 'T1027', 'T1029', 'T1030', 'T1033', 'T1036', 'T1037', 'T1039', 'T1040', 'T1041', 'T1046', 'T1047', 'T1048', 'T1049', 'T1052', 'T1053', 'T1055', 'T1056', 'T1057', 'T1059', 'T1068', 'T1069', 'T1070', 'T1071', 'T1072', 'T1074', 'T1078', 'T1080', 'T1082', 'T1083', 'T1087', 'T1090', 'T1091', 'T1092', 'T1095', 'T1098', 'T1102', 'T1104', 'T1105', 'T1106', 'T1110', 'T1111', 'T1112', 'T1113', 'T1114', 'T1115', 'T1119', 'T1120', 'T1123', 'T1124', 'T1125', 'T1127', 'T1129', 'T1132', 'T1133', 'T1134', 'T1135', 'T1136', 'T1137', 'T1140', 'T1176', 'T1185', 'T1187', 'T1189', 'T1190', 'T1195', 'T1197', 'T1199', 'T1200', 'T1201', 'T1202', 'T1203', 'T1204', 'T1205', 'T1207', 'T1210', 'T1211', 'T1212', 'T1213', 'T1216', 'T1217', 'T1218', 'T1219', 'T1220', 'T1221', 'T1222', 'T1480', 'T1482', 'T1484', 'T1485', 'T1486', 'T1489', 'T1490', 'T1491', 'T1495', 'T1496', 'T1497', 

In [60]:
print(tactic_id_to_main_techniques_map['TA0001'])

['T1078', 'T1091', 'T1133', 'T1189', 'T1190', 'T1195', 'T1199', 'T1200', 'T1566', 'T1659', 'T1669']


In [61]:
print(tactic_id_to_main_techniques_map['TA0002'])

['T1047', 'T1053', 'T1059', 'T1072', 'T1106', 'T1129', 'T1203', 'T1204', 'T1559', 'T1569', 'T1609', 'T1610', 'T1648', 'T1651', 'T1674', 'T1675']


In [9]:
print(tactic_id_to_main_techniques_map['TA0003'])

['T1037', 'T1053', 'T1078', 'T1098', 'T1112', 'T1133', 'T1136', 'T1137', 'T1176', 'T1197', 'T1205', 'T1505', 'T1525', 'T1542', 'T1543', 'T1546', 'T1547', 'T1554', 'T1556', 'T1574', 'T1653', 'T1668', 'T1671']


In [62]:
print(tactic_id_to_main_techniques_map['TA0004'])

['T1037', 'T1053', 'T1055', 'T1068', 'T1078', 'T1098', 'T1134', 'T1484', 'T1543', 'T1546', 'T1547', 'T1548', 'T1574', 'T1611']


In [None]:
print(tactic_id_to_main_techniques_map['TA0005'])

['T1006', 'T1014', 'T1027', 'T1036', 'T1055', 'T1070', 'T1078', 'T1112', 'T1127', 'T1134', 'T1140', 'T1197', 'T1202', 'T1205', 'T1207', 'T1211', 'T1216', 'T1218', 'T1220', 'T1221', 'T1222', 'T1480', 'T1484', 'T1497', 'T1535', 'T1542', 'T1548', 'T1550', 'T1553', 'T1556', 'T1562', 'T1564', 'T1574', 'T1578', 'T1599', 'T1600', 'T1601', 'T1610', 'T1612', 'T1620', 'T1622', 'T1647', 'T1656', 'T1666', 'T1672']


In [64]:
print(tactic_id_to_main_techniques_map['TA0006'])

['T1003', 'T1040', 'T1056', 'T1110', 'T1111', 'T1187', 'T1212', 'T1528', 'T1539', 'T1552', 'T1555', 'T1556', 'T1557', 'T1558', 'T1606', 'T1621', 'T1649']


In [65]:
print(tactic_id_to_main_techniques_map['TA0007'])

['T1007', 'T1010', 'T1012', 'T1016', 'T1018', 'T1033', 'T1040', 'T1046', 'T1049', 'T1057', 'T1069', 'T1082', 'T1083', 'T1087', 'T1120', 'T1124', 'T1135', 'T1201', 'T1217', 'T1482', 'T1497', 'T1518', 'T1526', 'T1538', 'T1580', 'T1613', 'T1614', 'T1615', 'T1619', 'T1622', 'T1652', 'T1654', 'T1673']


In [None]:
print(tactic_id_to_main_techniques_map['TA0008'])

['T1021', 'T1072', 'T1080', 'T1091', 'T1210', 'T1534', 'T1550', 'T1563', 'T1570']


In [67]:
print(tactic_id_to_main_techniques_map['TA0009'])

['T1005', 'T1025', 'T1039', 'T1056', 'T1074', 'T1113', 'T1114', 'T1115', 'T1119', 'T1123', 'T1125', 'T1185', 'T1213', 'T1530', 'T1557', 'T1560', 'T1602']


In [68]:
print(tactic_id_to_main_techniques_map['TA0010'])

['T1011', 'T1020', 'T1029', 'T1030', 'T1041', 'T1048', 'T1052', 'T1537', 'T1567']


In [69]:
print(tactic_id_to_main_techniques_map['TA0011'])

['T1001', 'T1008', 'T1071', 'T1090', 'T1092', 'T1095', 'T1102', 'T1104', 'T1105', 'T1132', 'T1205', 'T1219', 'T1568', 'T1571', 'T1572', 'T1573', 'T1659', 'T1665']


In [70]:
print(tactic_id_to_main_techniques_map['TA0040'])

['T1485', 'T1486', 'T1489', 'T1490', 'T1491', 'T1495', 'T1496', 'T1498', 'T1499', 'T1529', 'T1531', 'T1561', 'T1565', 'T1657', 'T1667']


In [71]:
print(tactic_id_to_main_techniques_map['TA0043'])

['T1589', 'T1590', 'T1591', 'T1592', 'T1593', 'T1594', 'T1595', 'T1596', 'T1597', 'T1598']


In [72]:
print(tactic_id_to_main_techniques_map['TA0042'])

['T1583', 'T1584', 'T1585', 'T1586', 'T1587', 'T1588', 'T1608', 'T1650']


In [74]:
all_tactic_names = list(tactic_id_name_map.keys())
#由小排到大
all_tactic_names.sort()
print(all_tactic_names)

['TA0001', 'TA0002', 'TA0003', 'TA0004', 'TA0005', 'TA0006', 'TA0007', 'TA0008', 'TA0009', 'TA0010', 'TA0011', 'TA0040', 'TA0042', 'TA0043']
