# Generate Training Data Files

建立四個檔案用於訓練：
- train.txt
- val.txt  
- test.txt
- disease_map.txt

這些檔案與official資料夾格式相同。

In [4]:
import pandas as pd
import os
from pathlib import Path

# Set paths
base_path = Path("/home/stoneyew/Desktop/pacs")
datasets_path = base_path / "datasets"
output_path = base_path / "experiment2"

# Read the CSV files
train_df = pd.read_csv(datasets_path / "train_labels.csv")
val_df = pd.read_csv(datasets_path / "val_labels.csv")
test_df = pd.read_csv(datasets_path / "test_labels.csv")

print("Train shape:", train_df.shape)
print("Val shape:", val_df.shape)
print("Test shape:", test_df.shape)
print("\nTrain columns:", train_df.columns.tolist())
print("Train head:")
print(train_df.head())

Train shape: (2094, 7)
Val shape: (300, 7)
Test shape: (598, 7)

Train columns: ['L-CC', 'R-CC', 'L-MLO', 'R-MLO', 'label', 'description', 'patient_id']
Train head:
                                  L-CC                                 R-CC  \
0  Category 3/XA2016080015193/L-CC.jpg  Category 3/XA2016080015193/R-CC.jpg   
1  Category 2/XA2016070003097/L-CC.jpg  Category 2/XA2016070003097/R-CC.jpg   
2  Category 1/XA2017030012689/L-CC.jpg  Category 1/XA2017030012689/R-CC.jpg   
3  Category 0/XA2017050007157/L-CC.jpg  Category 0/XA2017050007157/R-CC.jpg   
4  Category 0/XA2016080012358/L-CC.jpg  Category 0/XA2016080012358/R-CC.jpg   

                                  L-MLO                                 R-MLO  \
0  Category 3/XA2016080015193/L-MLO.jpg  Category 3/XA2016080015193/R-MLO.jpg   
1  Category 2/XA2016070003097/L-MLO.jpg  Category 2/XA2016070003097/R-MLO.jpg   
2  Category 1/XA2017030012689/L-MLO.jpg  Category 1/XA2017030012689/R-MLO.jpg   
3  Category 0/XA2017050007157/L-MLO.

In [5]:
# Create disease_map (BI-RADS categories from CSV labels)
disease_map = """Category 0
Category 1
Category 2
Category 3
Category 4
Category 5"""

with open(output_path / "PEAC/official/disease_map", "w") as f:
    f.write(disease_map)

print("disease_map created!")
print("Categories:")
print(disease_map)

disease_map created!
Categories:
Category 0
Category 1
Category 2
Category 3
Category 4
Category 5


In [8]:
# Map BI-RADS categories to disease presence
def create_disease_labels(label):
    """
    Create 14 disease binary labels based on BI-RADS category
    label: 0-5 (BI-RADS categories)
    Returns: list of 14 binary values
    """
    # Initialize all as 0
    diseases = [0] * 6
    
    # Map BI-RADS category to binary columns (0-5)
    label = int(label)
    if 0 <= label <= 5:
        diseases[label] = 1
    
    return diseases

In [1]:
import timm
print(timm.list_models("maxvit*"))

  from .autonotebook import tqdm as notebook_tqdm


['maxvit_base_tf_224', 'maxvit_base_tf_384', 'maxvit_base_tf_512', 'maxvit_large_tf_224', 'maxvit_large_tf_384', 'maxvit_large_tf_512', 'maxvit_nano_rw_256', 'maxvit_pico_rw_256', 'maxvit_rmlp_base_rw_224', 'maxvit_rmlp_base_rw_384', 'maxvit_rmlp_nano_rw_256', 'maxvit_rmlp_pico_rw_256', 'maxvit_rmlp_small_rw_224', 'maxvit_rmlp_small_rw_256', 'maxvit_rmlp_tiny_rw_256', 'maxvit_small_tf_224', 'maxvit_small_tf_384', 'maxvit_small_tf_512', 'maxvit_tiny_pm_256', 'maxvit_tiny_rw_224', 'maxvit_tiny_rw_256', 'maxvit_tiny_tf_224', 'maxvit_tiny_tf_384', 'maxvit_tiny_tf_512', 'maxvit_xlarge_tf_224', 'maxvit_xlarge_tf_384', 'maxvit_xlarge_tf_512']
