# 將 QA_new.json 轉換與映射

In [None]:
import json
import re
from pathlib import Path

## 載入 QA_new.json 資料

In [None]:
# 載入 QA_new.json
with open(r'c:\@李家峰\程式\onlineTest(修辦津要)\QA_new.json', 'r', encoding='utf-8') as f:
    qa_data = json.load(f)

print(f"載入了 {len(qa_data)} 筆 QA_new 資料")
print(f"第一筆資料頁數: {qa_data[0]['page']}")

## 讀取考古題.md，計算總題數

In [None]:
# 讀取考古題.md並計算總題數
with open(r'c:\@李家峰\程式\onlineTest(修辦津要)\考古題.md', 'r', encoding='utf-8') as f:
    md_content = f.read()

# 計算題目數量（以數字開頭的行）
question_pattern = r'^\d+\.\s+'
total_questions = len(re.findall(question_pattern, md_content, re.MULTILINE))
print(f"考古題總數: {total_questions}")

## 解析有題目.md的對應關係

In [None]:
# 解析有題目.md中的對應關係
mapping_data = []

with open(r'c:\@李家峰\程式\onlineTest(修辦津要)\有題目.md', 'r', encoding='utf-8') as f:
    lines = f.readlines()

for line in lines:
    line = line.strip()
    # 跳過表頭和分隔線
    if not line or line.startswith('|') and ('考古題題號' in line or ':---' in line):
        continue
    
    # 解析表格行
    if line.startswith('|'):
        parts = [p.strip() for p in line.split('|')]
        if len(parts) >= 3:
            question_nums = parts[1]  # 考古題題號
            page_nums = parts[2]       # 對應頁數
            
            # 提取頁數
            page_match = re.search(r'頁\s*(\d+)', page_nums)
            if page_match:
                page = page_match.group(1)
                
                # 解析題號範圍
                # 處理格式如 "12-13", "14-16, 18", "117 (部分), 118"
                question_nums = question_nums.replace('**', '').strip()
                
                # 分割逗號分隔的題號
                for part in question_nums.split(','):
                    part = part.strip()
                    # 移除 (部分) 等註釋
                    part = re.sub(r'\s*\(.*?\)', '', part)
                    
                    # 處理範圍 (如 12-13)
                    if '-' in part:
                        start, end = part.split('-')
                        start, end = int(start.strip()), int(end.strip())
                        for q in range(start, end + 1):
                            mapping_data.append({'question': q, 'page': page})
                    else:
                        # 單一題號
                        try:
                            q = int(part.strip())
                            mapping_data.append({'question': q, 'page': page})
                        except ValueError:
                            pass

print(f"解析到 {len(mapping_data)} 筆對應關係")
print("前10筆:")
for item in mapping_data[:10]:
    print(f"  題號 {item['question']} -> 頁 {item['page']}")

## 建立頁數到題號索引的映射

In [None]:
# 建立QA_new.json中每個頁數有多少題的索引
page_question_count = {}
for item in qa_data:
    page = item['page']
    if page not in page_question_count:
        page_question_count[page] = 0
    page_question_count[page] += 1

print("QA_new.json 各頁題數統計:")
for page in sorted(page_question_count.keys(), key=lambda x: int(x) if x.isdigit() else 0)[:20]:
    print(f"  頁 {page}: {page_question_count[page]} 題")

## 生成頁數.txt和題號.txt

根據有題目.md的對應關係，為每個考古題分配頁數和題號索引。
- 頁數.txt: 每個考古題對應的QA_new.json頁數
- 題號.txt: 該題在該頁question陣列中的索引 (0-based)

In [None]:
# 初始化結果陣列（400題，預設為空字串或"Not Found"）
pages = ["" for _ in range(total_questions)]
question_indices = ["Not Found" for _ in range(total_questions)]

# 建立題號到頁數的字典
question_to_page = {}
for item in mapping_data:
    q = item['question']
    page = item['page']
    question_to_page[q] = page

# 統計每個頁面已經使用的題號索引
page_used_indices = {}

# 填入對應資料
for q_num in range(1, total_questions + 1):
    if q_num in question_to_page:
        page = question_to_page[q_num]
        pages[q_num - 1] = page
        
        # 分配該頁的題號索引（按順序分配）
        if page not in page_used_indices:
            page_used_indices[page] = 0
        else:
            page_used_indices[page] += 1
        
        # 確保不超過該頁的題數
        if page in page_question_count:
            if page_used_indices[page] < page_question_count[page]:
                question_indices[q_num - 1] = str(page_used_indices[page])
            else:
                # 超出範圍，標記為Not Found
                question_indices[q_num - 1] = "Not Found"
        else:
            question_indices[q_num - 1] = "Not Found"

# 顯示統計
found_count = sum(1 for p in pages if p != "")
print(f"找到對應的題目: {found_count}/{total_questions}")
print(f"未找到的題目: {total_questions - found_count}")

# 顯示前20筆
print("\n前20筆結果:")
for i in range(20):
    print(f"  題{i+1}: 頁={pages[i] if pages[i] else 'Not Found'}, 題號索引={question_indices[i]}")

## 儲存到新檔案

In [None]:
# 保存為新檔案
output_dir = r'c:\@李家峰\程式\onlineTest(修辦津要)\考古題'

# 儲存頁數.txt
pages_output = [p if p else "Not Found" for p in pages]
with open(f'{output_dir}\\頁數_新.txt', 'w', encoding='utf-8') as f:
    json.dump(pages_output, f, ensure_ascii=False, indent=1)

# 儲存題號.txt
with open(f'{output_dir}\\題號_新.txt', 'w', encoding='utf-8') as f:
    json.dump(question_indices, f, ensure_ascii=False, indent=1)

print(f"已儲存到:")
print(f"  {output_dir}\\頁數_新.txt")
print(f"  {output_dir}\\題號_新.txt")

## 顯示部分結果進行驗證

In [None]:
# 驗證：顯示題號12-20的結果（根據有題目.md，12-13應該對應頁10）
print("驗證題號 12-20:")
for i in range(11, 20):  # 索引11-19對應題號12-20
    print(f"  題{i+1}: 頁={pages_output[i]}, 題號索引={question_indices[i]}")

print("\n驗證題號 163-170 (應對應頁82-86):")
for i in range(162, 170):  # 索引162-169對應題號163-170
    print(f"  題{i+1}: 頁={pages_output[i]}, 題號索引={question_indices[i]}")