In [1]:
import pandas as pd
import os

## Utils

In [2]:
def get_data_df(data_dir, data_names, csv_file_names):
    data_dfs = {}
    for data_name in data_names:
        data_dfs[data_name] = {}
        for csv_file_name in csv_file_names:
            csv_file_path = os.path.join(
                data_dir,
                data_name,
                f'{csv_file_name}.csv'
            )
            data_dfs[data_name][csv_file_name] = pd.read_csv(csv_file_path)
    return data_dfs

In [3]:
def get_counts(df, data_names, csv_file_names, col_name):
    counts = []
    for data_name in data_names:
        for csv_file_name in csv_file_names[:2]:
            counts.append(len(df[data_name][csv_file_name][col_name]))
    return counts


def get_count_df(df, data_names, csv_file_names, idx_names, is_cnt_pid):
    if is_cnt_pid:
        col_name = 'pid'
        col_names = ['no. pid']
    else:
        col_name = 'ImageId'
        col_names = ['no. img']

    return pd.DataFrame(
        get_counts(df, data_names, csv_file_names, col_name=col_name),
        index=idx_names,
        columns=col_names
    ).T

In [4]:
def get_means(df, data_names, csv_file_names):
    means = []
    for data_name in data_names:
        for csv_file_name in csv_file_names:
            means.append(df[data_name][csv_file_name].mean())
    return means


def get_mean_df(df, data_names, csv_file_names, idx_names):
    return pd.DataFrame(
        get_means(df, data_names, csv_file_names),
        index=idx_names,
    ).T

## Eval1

In [5]:
data_dir = 'data_csv'
eval_dir = 'data_csv/eval'
data_names = ['crop2', 'adj_contrast', 'adj_contrast_corcta']
csv_file_names = ['tr', 'tt', 'corcta', 'corcta_adj_contract']
tr_tt_idx_names = ['c2_tr', 'c2_tt', 'ac_tr', 'ac_tt', 'acc_tr', 'acc_tt']

data_df = get_data_df(data_dir, data_names, csv_file_names[:2])
eval_df = get_data_df(eval_dir, data_names, csv_file_names)
cnt_img_df = get_count_df(data_df, data_names, csv_file_names, tr_tt_idx_names, is_cnt_pid=False)
cnt_pid_df = get_count_df(eval_df, data_names, csv_file_names, tr_tt_idx_names, is_cnt_pid=True)
tr_tt_df = get_mean_df(eval_df, data_names, csv_file_names[:2], tr_tt_idx_names)
corcta_df = get_mean_df(eval_df, data_names, csv_file_names[2:], tr_tt_idx_names).T

  """


In [6]:
print('c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料')
print('tr: 訓練資料集, tt: 測試資料集')
pd.concat([tr_tt_df, cnt_img_df,  cnt_pid_df]).T

c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料
tr: 訓練資料集, tt: 測試資料集


Unnamed: 0,dc,hd95,no. img,no. pid
c2_tr,0.937147,4.595943,2470.0,62.0
c2_tt,0.830196,14.493962,358.0,7.0
ac_tr,0.944951,4.442906,2470.0,62.0
ac_tt,0.84082,12.433626,358.0,7.0
acc_tr,0.943632,4.279438,2697.0,63.0
acc_tt,0.831059,12.305764,358.0,7.0


In [7]:
print('c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料')
print('c: 醫院資料, ca: 醫院資料+調整對比度')
corcta_df

c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料
c: 醫院資料, ca: 醫院資料+調整對比度


Unnamed: 0,dc,hd95
c2_tr,0.439503,33.958421
c2_tt,0.451882,33.617197
ac_tr,0.505809,30.655261
ac_tt,0.515323,28.934291
acc_tr,0.885911,2.173923
acc_tt,0.891567,1.663185


In [8]:
eval_df['adj_contrast_corcta']['corcta_adj_contract']

Unnamed: 0,pid,dc,hd95
0,corcta_adj_contract,0.891567,1.663185


## Eval2

In [9]:
data_dir = 'data_csv'
eval_dir = 'data_csv/eval2'
data_names = ['crop2', 'adj_contrast', 'adj_contrast_corcta']
csv_file_names = ['tr', 'tt', 'corcta', 'corcta_adj_contract']
tr_tt_idx_names = ['c2_tr', 'c2_tt', 'ac_tr', 'ac_tt', 'acc_tr', 'acc_tt']

data_df = get_data_df(data_dir, data_names, csv_file_names[:2])
eval_df = get_data_df(eval_dir, data_names, csv_file_names)
cnt_img_df = get_count_df(data_df, data_names, csv_file_names, tr_tt_idx_names, is_cnt_pid=False)
cnt_pid_df = get_count_df(eval_df, data_names, csv_file_names, tr_tt_idx_names, is_cnt_pid=True)
tr_tt_df = get_mean_df(eval_df, data_names, csv_file_names[:2], tr_tt_idx_names)
corcta_df = get_mean_df(eval_df, data_names, csv_file_names[2:], tr_tt_idx_names).T

  """


In [10]:
print('c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料')
print('tr: 訓練資料集, tt: 測試資料集')
pd.concat([tr_tt_df, cnt_img_df,  cnt_pid_df]).T

c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料
tr: 訓練資料集, tt: 測試資料集


Unnamed: 0,dc,hd95,no. img,no. pid
c2_tr,0.937147,4.595943,2470.0,62.0
c2_tt,0.830196,14.493962,358.0,7.0
ac_tr,0.944951,4.442906,2470.0,62.0
ac_tt,0.84082,12.433626,358.0,7.0
acc_tr,0.943632,4.279438,2697.0,63.0
acc_tt,0.831059,12.305764,358.0,7.0


In [11]:
print('c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料')
print('c: 醫院資料, ca: 醫院資料+調整對比度')
corcta_df

c2: 裁切, ac: 調整對比度, acc: 調整對比度+醫院資料
c: 醫院資料, ca: 醫院資料+調整對比度


Unnamed: 0,dc,hd95
c2_tr,0.413286,33.958421
c2_tt,0.414429,33.617197
ac_tr,0.460865,30.655261
ac_tt,0.474125,28.934291
acc_tr,0.852204,2.173923
acc_tt,0.857859,1.663185


In [12]:
eval_df['adj_contrast_corcta']['corcta_adj_contract']

Unnamed: 0,pid,dc,hd95
0,corcta_adj_contract,0.857859,1.663185
