In [None]:
from glob import glob 
import pandas as pd
result_root_dir = "/workspace/pcos_dataset/results/label변경/binary/dinov2-base-imagenet1k-1-layer"
test_files = glob(f"{result_root_dir}/test_fold_*/test_metrics.csv")
test_files

In [None]:
from IPython.display import display
test_dict = {}
for file in test_files:
    fold = file.split("/")[-2]
    df = pd.read_csv(file)
    test_dict[fold] = df

# 첫번째 칼럼에 key값 들어가기
test_df = pd.concat(
    [df.assign(fold=int(key.split("_")[-1])+1) for key, df in test_dict.items()],
    ignore_index=True
)
test_df = test_df[['fold'] + [col for col in test_df.columns if col != 'fold']]

# 다양한 metric 컬럼 변형에 튼튼하게: 존재하는 컬럼만 사용해서 display_df와 평균/표준편차 구하기
def get_display_and_stats(test_df):
    # metric 후보 세트들 (첫번째는 fold로 시작, 나머지는 metric)
    metric_sets = [
        ['eval_accuracy', 'eval_f1_macro', 'eval_precision_macro', 'eval_recall_macro'],
        ['accuracy', 'f1_macro', 'precision_macro', 'recall_macro', 'roc_auc'],
        ['eval_accuracy', 'eval_f1', 'eval_roc_auc', 'eval_cohen_kappa', 'eval_recall', 'eval_precision'],
        ['eval_accuracy', 'eval_f1', 'eval_precision', 'eval_recall', 'eval_roc_auc_ovr'],  # multiclass에서 나오는 형태
    ]
    # always include fold!
    for candidate_metrics in metric_sets:
        col_list = ['fold'] + candidate_metrics
        available_cols = [c for c in col_list if c in test_df.columns]
        # fold 이외에 최소 2개의 metric이 있어야 의미 있음
        if len(available_cols) >= 3:
            display_df = test_df[available_cols].round(3).sort_values(by='fold', ascending=True)
            # fold 빼고 나머지를 mean/std
            metric_cols = [c for c in display_df.columns if c != 'fold']
            mean_series = display_df[metric_cols].mean()
            std_series = display_df[metric_cols].std()
            return display_df, mean_series, std_series
    # 마지막 대안: fold + 모든 수치형 컬럼(평균/표준편차 구할 수 있는 것)
    import numpy as np
    num_cols = test_df.select_dtypes(include=[np.number]).columns.tolist()
    if 'fold' in num_cols:
        metric_cols = [c for c in num_cols if c != 'fold']
        display_cols = ['fold'] + metric_cols
        display_df = test_df[display_cols].round(3).sort_values(by='fold', ascending=True)
        mean_series = display_df[metric_cols].mean()
        std_series = display_df[metric_cols].std()
        return display_df, mean_series, std_series
    else:
        raise ValueError("No usable metrics found in test_df columns: " + str(test_df.columns))

display_df, mean_series, std_series = get_display_and_stats(test_df)
        
# 평균 ± 표준편차 문자열로 만들기
mean_std_row = {
    'fold': 'Mean ± Std',
    **{col: f"{mean_series[col]:.3f} ± {std_series[col]:.3f}" for col in mean_series.index}
}


# 행 연결
display_df_with_stats = pd.concat(
    [
        display_df,
        pd.DataFrame([mean_std_row])
        #, pd.DataFrame([std_row])  # 필요시 추가
    ],
    ignore_index=True
)

# 95% CI 추가 
import scipy.stats as st
import numpy as np

# 95% CI를 계산할 함수
def mean_ci(series):
    arr = series.dropna().astype(float).values
    n = len(arr)
    if n < 2:
        return f"{arr.mean():.3f} ± NA"
    m = arr.mean()
    se = arr.std(ddof=1) / np.sqrt(n)
    h = se * st.t.ppf(0.975, n-1)
    return f"{m:.3f} ± {h:.3f}"

# fold 칼럼을 제외한 숫자형 평가 컬럼 찾기
metric_cols = [col for col in display_df.columns if col != 'fold']

display(display_df_with_stats)

## 모델 비교

In [18]:
from glob import glob 

result_root_dir = "/workspace/pcos_dataset/results/label변경/binary"
test_files = glob(f"{result_root_dir}/*/test_fold_0/test_metrics.csv")
test_files

['/workspace/pcos_dataset/results/label변경/binary/convnext-tiny-224/test_fold_0/test_metrics.csv',
 '/workspace/pcos_dataset/results/label변경/binary/vit-base-patch16-224/test_fold_0/test_metrics.csv',
 '/workspace/pcos_dataset/results/label변경/binary/dinov2-base-imagenet1k-1-layer/test_fold_0/test_metrics.csv',
 '/workspace/pcos_dataset/results/label변경/binary/resnet-50/test_fold_0/test_metrics.csv',
 '/workspace/pcos_dataset/results/label변경/binary/efficientnet-b0/test_fold_0/test_metrics.csv']

In [26]:
from IPython.display import display
import pandas as pd 

test_dict = {}
for file in test_files:
    model = file.split("/")[-3]
    df = pd.read_csv(file)
    test_dict[model] = df

# 첫번째 칼럼에 모델명을 넣기
test_df = pd.concat(
    [df.assign(model=model) for model, df in test_dict.items()],
    ignore_index=True
)
test_df = test_df[['model'] + [col for col in test_df.columns if col != 'model']]
print(test_df.columns)

# 사용할 대표적인 컬럼 조합을 리스트로 정의
candidate_column_sets = [
    ['fold', 'eval_accuracy', 'eval_f1_macro', 'eval_precision_macro', 'eval_recall_macro'],
    ['fold', 'accuracy', 'f1_macro', 'precision_macro', 'recall_macro', 'roc_auc'],
    ['fold', 'eval_accuracy', 'eval_f1', 'eval_roc_auc', 'eval_cohen_kappa', 'eval_recall', 'eval_precision'],
    # 만약 fold가 없는 경우 model이 fold 역할을 대신한다 생각하고 추가
    ['model', 'eval_accuracy', 'eval_f1_macro', 'eval_precision_macro', 'eval_recall_macro'],
    ['model', 'accuracy', 'f1_macro', 'precision_macro', 'recall_macro', 'roc_auc'],
    ['model', 'eval_accuracy', 'eval_f1', 'eval_roc_auc', 'eval_cohen_kappa', 'eval_recall', 'eval_precision'],
]

display_df = None
target_cols = None

for col_set in candidate_column_sets:
    if all(col in test_df.columns for col in col_set):
        # fold 또는 model을 첫 컬럼으로 표기
        key_col = col_set[0]
        metric_cols = [col for col in col_set if col != key_col]
        display_df = test_df[col_set].copy()
        display_df = display_df.round(3).sort_values(by=key_col, ascending=True)
        target_cols = metric_cols
        index_col_for_stats = key_col
        break

if display_df is None:
    # 그래도 못찾으면 model 열만 넣어서라도 만듦
    metric_cols = [col for col in test_df.columns if col != 'model']
    display_df = test_df[['model'] + metric_cols].copy()
    display_df = display_df.round(3)
    target_cols = metric_cols
    index_col_for_stats = 'model'

# 평균과 표준편차 계산: target_cols만 사용
mean_series = display_df[target_cols].mean()
std_series = display_df[target_cols].std()

# 평균 ± 표준편차 문자열로 만들기
mean_std_row = {
    display_df.columns[0]: 'Mean ± Std',
    **{col: f"{mean_series[col]:.3f} ± {std_series[col]:.3f}" for col in target_cols}
}

# 행 연결
display_df_with_stats = pd.concat(
    [
        display_df,
        pd.DataFrame([mean_std_row])
    ],
    ignore_index=True
)


display(display_df_with_stats)

Index(['model', 'eval_loss', 'eval_model_preparation_time', 'eval_accuracy',
       'eval_f1', 'eval_precision', 'eval_recall', 'eval_cohen_kappa',
       'eval_matthews_corrcoef', 'eval_roc_auc', 'eval_best_threshold',
       'eval_best_f1_at_threshold', 'eval_best_threshold_roc_auc',
       'eval_runtime', 'eval_samples_per_second', 'eval_steps_per_second'],
      dtype='object')


Unnamed: 0,model,eval_accuracy,eval_f1,eval_roc_auc,eval_cohen_kappa,eval_recall,eval_precision
0,convnext-tiny-224,0.832,0.589,0.835,0.484,0.537,0.652
1,dinov2-base-imagenet1k-1-layer,0.836,0.527,0.841,0.438,0.407,0.746
2,efficientnet-b0,0.751,0.52,0.781,0.356,0.602,0.458
3,resnet-50,0.775,0.0,0.665,0.0,0.0,0.0
4,vit-base-patch16-224,0.81,0.625,0.872,0.502,0.734,0.544
5,Mean ± Std,0.801 ± 0.037,0.452 ± 0.257,0.799 ± 0.082,0.356 ± 0.207,0.456 ± 0.281,0.480 ± 0.290


In [None]:
print(test_dict.keys())
print(display(test_dict['resnet-50']))

## SigLIP Zero-shot Classification 비교

In [28]:
from glob import glob 
import pandas as pd 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

result_root_dir = "/workspace/pcos_dataset/results/zero_shot/siglip2/google"
result_files = glob(f"{result_root_dir}/*/*.csv")



def multi_class_metrics(df, label_col = "label", pred_col = "preds"):
    # 다중분류 메트릭 계산
    labels = df[label_col]
    preds = df[pred_col]

    accuracy = accuracy_score(labels, preds)
    f1_macro = f1_score(labels, preds, average='macro')
    precision_macro = precision_score(labels, preds, average='macro')
    recall_macro = recall_score(labels, preds, average='macro')

    # 메트릭 결과를 데이터프레임으로 변환
    metrics_df = pd.DataFrame([{
        "accuracy": accuracy,
        "f1_macro": f1_macro,
        "precision_macro": precision_macro,
        "recall_macro": recall_macro
    }])
    return metrics_df

all_df = []
for result_file in result_files:
    df = pd.read_csv(result_file)
    label = result_file.split("/")[-2]
    metrics = multi_class_metrics(df)
    metrics.insert(0, 'model', label)  # result_file.split("/")[-2]를 첫 번째 열에 추가
    all_df.append(metrics)

all_df = pd.concat(all_df, ignore_index=True)

all_df.round(3)

Unnamed: 0,model,accuracy,f1_macro,precision_macro,recall_macro
0,siglip2-so400m-patch16-384,0.413,0.297,0.314,0.301
1,siglip2-base-patch32-256,0.248,0.18,0.446,0.278
2,siglip2-base-patch16-384,0.4,0.278,0.302,0.292
3,siglip2-so400m-patch14-384,0.488,0.287,0.29,0.3
4,siglip2-large-patch16-384,0.354,0.24,0.581,0.31


In [2]:
from transformers import AutoImageProcessor
processor = AutoImageProcessor.from_pretrained(
    "google/siglip2-so400m-patch16-384",
    cache_dir="/workspace/pcos_dataset/models",
)

processor.size

{'height': 384, 'width': 384}