In [8]:
from data import *
from models import *
from utils import *

from sklearn.metrics import f1_score, accuracy_score

MAE_DIR = '/home/neelamlab/ninad/MAE'
ROOT_DIR = '/data/ninad/DATASET_linear'
LABEL_DIR = f'/data/ninad/Metadata'
RESULT_DIR = f'{MAE_DIR}/results'
LOG_DIR = f'{MAE_DIR}/logs/'
TASK = 'regression'
CDRGLOB = 0.0
ONLY_3T_SCANS = 0.001

NP_SEED = 42
TORCH_SEED = 36
np.random.seed(NP_SEED)
torch.manual_seed(TORCH_SEED)

LOGGER = setup_logger(logs_dir=LOG_DIR)

In [4]:
def plot_brain_age_delta(true,predicted,save=False,name='some_name'):
    max_val=int(max(max(true),max(predicted)))
    min_val=int(min(min(true),min(predicted)))
    if not max_val : max_val = 1
    print('MAX',max_val,"MIN:",min_val)
    max_val *= 100
    min_val *= 100
    true = np.array(true)*100
    predicted = np.array(predicted)*100
    plt.plot(np.linspace(min_val, max_val), np.linspace(min_val, max_val), color='red', linestyle='--', label='y=x')
    N = len(true)
    mae = mean_absolute_error(true,predicted)
    r2 = r2_score(true,predicted)
    plt.title(f'Brain age delta: {name}, MAE:{mae:.4f}, R2:{r2:.4f} for N={N}')
    plt.scatter(np.array(true),np.array(predicted), alpha=0.5, edgecolors='k')
    plt.ylim(min_val,max_val)
    plt.xlim(min_val,max_val)
    plt.ylabel('Predicted')
    plt.xlabel('True')
    if save: plt.savefig(f'{name}.png',dpi=400)
    else: plt.show()

In [10]:
RESULT_DIR = 'results'
filename = 'test_predictions_TEST_DATA.csv'
runmetrics = {}
age_threshold = 0
for run in os.listdir(RESULT_DIR):
    if run[:4]=='SFCN': # and 100<=int(run.split('_')[-1])<=107 and int(run.split('_')[-1])!=89:
        print(run)
        for fold in range(5):
            print(os.path.join(RESULT_DIR, run, f'fold_{fold}',filename))
            # runmetrics[run] = {}
            if os.path.exists(os.path.join(RESULT_DIR, run, f'fold_{fold}',filename)):
                true, predicted = [],[]
                for dataset_name, _, i,j in pd.read_csv(os.path.join(RESULT_DIR, run, f'fold_{fold}', filename)).values:
                    if True: #dataset_name == 'CBR': #dataset_name != 'CBR' and dataset_name != 'LASI': #dataset_name == 'CBR': #
                        if i < age_threshold:
                            continue
                        true.append(i)
                        predicted.append(j)
                # plot_brain_age_delta(true, predicted, name=f'{run} TEST')
                runmetrics[f'{run}_f{fold}'] = {}
                runmetrics[f'{run}_f{fold}']['F1'] = f1_score(true,predicted)
                runmetrics[f'{run}_f{fold}']['Acc'] = accuracy_score(true,predicted)

SFCN_seed_4
results/SFCN_seed_4/fold_0/test_predictions_TEST_DATA.csv
results/SFCN_seed_4/fold_1/test_predictions_TEST_DATA.csv
results/SFCN_seed_4/fold_2/test_predictions_TEST_DATA.csv
results/SFCN_seed_4/fold_3/test_predictions_TEST_DATA.csv
results/SFCN_seed_4/fold_4/test_predictions_TEST_DATA.csv
SFCN_seed_6
results/SFCN_seed_6/fold_0/test_predictions_TEST_DATA.csv
results/SFCN_seed_6/fold_1/test_predictions_TEST_DATA.csv
results/SFCN_seed_6/fold_2/test_predictions_TEST_DATA.csv
results/SFCN_seed_6/fold_3/test_predictions_TEST_DATA.csv
results/SFCN_seed_6/fold_4/test_predictions_TEST_DATA.csv
SFCN_seed_3
results/SFCN_seed_3/fold_0/test_predictions_TEST_DATA.csv
results/SFCN_seed_3/fold_1/test_predictions_TEST_DATA.csv
results/SFCN_seed_3/fold_2/test_predictions_TEST_DATA.csv
results/SFCN_seed_3/fold_3/test_predictions_TEST_DATA.csv
results/SFCN_seed_3/fold_4/test_predictions_TEST_DATA.csv
SFCN_seed_1
results/SFCN_seed_1/fold_0/test_predictions_TEST_DATA.csv
results/SFCN_seed_1/fold

In [11]:
df = pd.DataFrame.from_dict(runmetrics).T
mean_values = df.mean().round(4)
std_values = df.std().round(4)
mean_std = mean_values.astype(str) + " ± " + std_values.astype(str)
# df.loc['mean ± std'] = mean_std
df.head()

Unnamed: 0,F1,Acc
SFCN_seed_4_f0,0.757576,0.634286
SFCN_seed_4_f1,0.740741,0.6
SFCN_seed_4_f2,0.743494,0.605714
SFCN_seed_4_f3,0.743494,0.605714
SFCN_seed_4_f4,0.769231,0.657143


In [14]:
df['Seed'] = df.index.to_series().apply(lambda x: '_'.join(x.split('_')[:3]))
summary_df = df.groupby('Seed').agg({'F1': ['mean', 'std'], 'Acc': ['mean', 'std']})
summary_df.columns = ['_'.join(col) for col in summary_df.columns]
print(summary_df)

              F1_mean    F1_std  Acc_mean   Acc_std
Seed                                               
SFCN_seed_1  0.752064  0.018326  0.626286  0.036500
SFCN_seed_2  0.745386  0.025825  0.616000  0.051333
SFCN_seed_3  0.734965  0.017711  0.595429  0.035777
SFCN_seed_4  0.750907  0.012181  0.620571  0.024445
SFCN_seed_6  0.375895  0.396232  0.573714  0.165152
