In [2]:
import json
from data_utils import compute_head_medium_tail, load_index
from rich.console import Console
from rich.table import Table

console = Console()

def display_head_medium_tail_distribution():
    """
    計算並顯示NCHU_sorted_data.json內各個SymCat和SymTags的head/medium/tail分佈
    """
    # 獲取頭尾分佈數據
    head_tail_info = compute_head_medium_tail('NCHU_sorted_data.json')
    
    # 加載索引數據以獲取標籤的描述信息
    label_to_idx_dict, idx_to_label_dict, index_data = load_index('SymCat.json')
    
    # 創建標籤ID到描述的映射
    id_to_desc = {}
    
    # 處理 symptomCategories
    for category in index_data['symptomCategories']:
        id_to_desc[category['id']] = category['description']
        
        # 處理每個類別下的標籤
        for tag in category['tags']:
            id_to_desc[tag['id']] = tag['description']
    
    # 顯示 SymCat 的頭尾分佈
    display_category_distribution('symptomCategories', head_tail_info, id_to_desc)
    
    # 顯示 SymTags 的頭尾分佈
    display_category_distribution('symptomTags', head_tail_info, id_to_desc)

def display_category_distribution(category_type, head_tail_info, id_to_desc):
    """
    顯示特定類別的頭尾分佈
    
    Args:
        category_type: 'symptomCategories' 或 'symptomTags'
        head_tail_info: 從compute_head_medium_tail獲取的頭尾分佈數據
        id_to_desc: ID到描述的映射
    """
    if category_type not in head_tail_info:
        console.print(f"[red]錯誤: {category_type} 在head_tail_info中不存在[/red]")
        return
    
    category_info = head_tail_info[category_type]
    
    # 顯示閾值信息
    console.print(f"\n[bold cyan]== {category_type} 分佈 ==[/bold cyan]")
    console.print(f"33% 百分位數: {category_info['33_percentile']}")
    console.print(f"67% 百分位數: {category_info['67_percentile']}")
    
    # 為每個分組創建表格
    for group in ['head', 'medium', 'tail']:
        table = Table(title=f"{category_type} - {group.upper()}")
        table.add_column("ID", style="dim")
        table.add_column("描述", style="green")
        table.add_column("出現次數", justify="right", style="cyan")
        
        # 按出現次數降序排序
        sorted_items = sorted(category_info[group].items(), key=lambda x: x[1], reverse=True)
        
        for label_id, count in sorted_items:
            description = id_to_desc.get(label_id, "未知描述")
            table.add_row(label_id, description, str(count))
        
        console.print(table)
        console.print(f"[italic]共 {len(sorted_items)} 個{group}類標籤[/italic]\n")

def main():
    try:
        console.print("[bold]開始分析NCHU_sorted_data.json中的SymCat和SymTags分佈...[/bold]")
        display_head_medium_tail_distribution()
        console.print("[bold green]分析完成![/bold green]")
    except FileNotFoundError as e:
        console.print(f"[bold red]錯誤: 找不到文件 - {e}[/bold red]")
    except json.JSONDecodeError as e:
        console.print(f"[bold red]錯誤: JSON解析失敗 - {e}[/bold red]")
    except Exception as e:
        console.print(f"[bold red]錯誤: {e}[/bold red]")

if __name__ == "__main__":
    main()

In [2]:
import json
from data_utils import compute_head_medium_tail, load_index
from rich.console import Console
from rich.table import Table

console = Console()

def display_head_medium_tail_distribution():
    """
    計算並顯示NCHU_sorted_data.json內各個SymCat和SymTags的head/medium/tail分佈
    """
    # 獲取頭尾分佈數據
    head_tail_info = compute_head_medium_tail('NCHU_sorted_data.json')
    
    # 加載索引數據以獲取標籤的描述信息
    label_to_idx_dict, idx_to_label_dict, index_data = load_index('SymCat.json')
    
    # 創建標籤ID到描述的映射
    id_to_desc = {}
    
    # 處理 symptomCategories
    for category in index_data['symptomCategories']:
        id_to_desc[category['id']] = category['description']
        
        # 處理每個類別下的標籤
        for tag in category['tags']:
            id_to_desc[tag['id']] = tag['description']
    
    # 顯示 SymCat 的頭尾分佈
    display_category_distribution('symptomCategories', head_tail_info, id_to_desc)
    
    # 顯示 SymTags 的頭尾分佈
    display_category_distribution('symptomTags', head_tail_info, id_to_desc)

def display_category_distribution(category_type, head_tail_info, id_to_desc):
    """
    顯示特定類別的頭尾分佈
    
    Args:
        category_type: 'symptomCategories' 或 'symptomTags'
        head_tail_info: 從compute_head_medium_tail獲取的頭尾分佈數據
        id_to_desc: ID到描述的映射
    """
    if category_type not in head_tail_info:
        console.print(f"[red]錯誤: {category_type} 在head_tail_info中不存在[/red]")
        return
    
    category_info = head_tail_info[category_type]
    
    # 顯示閾值信息
    console.print(f"\n[bold cyan]== {category_type} 分佈 ==[/bold cyan]")
    console.print(f"33% 百分位數: {category_info['33_percentile']}")
    console.print(f"67% 百分位數: {category_info['67_percentile']}")
    
    # 為每個分組創建表格
    for group in ['head', 'medium', 'tail']:
        table = Table(title=f"{category_type} - {group.upper()}")
        table.add_column("ID", style="dim")
        table.add_column("描述", style="green")
        table.add_column("出現次數", justify="right", style="cyan")
        
        # 按出現次數降序排序
        sorted_items = sorted(category_info[group].items(), key=lambda x: x[1], reverse=True)
        
        for label_id, count in sorted_items:
            description = id_to_desc.get(label_id, "未知描述")
            table.add_row(label_id, description, str(count))
        
        console.print(table)
        console.print(f"[italic]共 {len(sorted_items)} 個{group}類標籤[/italic]\n")

def main():
    try:
        console.print("[bold]開始分析NCHU_sorted_data.json中的SymCat和SymTags分佈...[/bold]")
        display_head_medium_tail_distribution()
        console.print("[bold green]分析完成![/bold green]")
    except FileNotFoundError as e:
        console.print(f"[bold red]錯誤: 找不到文件 - {e}[/bold red]")
    except json.JSONDecodeError as e:
        console.print(f"[bold red]錯誤: JSON解析失敗 - {e}[/bold red]")
    except Exception as e:
        console.print(f"[bold red]錯誤: {e}[/bold red]")

if __name__ == "__main__":
    main()