In [1]:
import os
import pickle
import pandas as pd

import sys
sys.path.append('..')
import utils

from sklearn.metrics import accuracy_score

In [2]:
# Constants and arguments
data_dir = "../../data/tnm_stage"
out_preds_path = "./model_preds"
model_name = "llama_8B"
prompt_name = "tnm_zs"
out_path = os.path.join(out_preds_path, f"tnm_label_{model_name}_prompt_{prompt_name}_{{dataset}}_preds.csv")

# Data loading

## Training

In [3]:
df_train = pd.read_csv(os.path.join(data_dir, "train_tcga_reports_tnm_stage.csv"))

In [4]:
df_train.shape

(1947, 6)

In [5]:
assert not df_train['patient_id'].duplicated().any()
df_train.index = df_train['patient_id'].values

## Test

In [9]:
df_test = pd.read_csv(os.path.join(data_dir, "test_tcga_reports_tnm_stage.csv"))

In [10]:
df_test.shape

(1170, 6)

In [11]:
assert not df_test['patient_id'].duplicated().any()
df_test.index = df_test['patient_id'].values

## Model preds

### Test

In [16]:
df_test_pred = pd.read_csv(out_path.format(dataset="test"))

In [17]:
df_test_pred.shape

(1170, 8)

In [18]:
assert not df_test_pred['patient_id'].duplicated().any()
df_test_pred.index = df_test_pred['patient_id'].values

# T subtask

In [19]:
accuracy_score(
    y_true=df_test['t_label'].values,
    y_pred=df_test_pred.loc[df_test['patient_id'].values, 't_label'].values
)

0.541025641025641

In [20]:
utils.calculate_performance(
    arr_gs=df_test['t_label'].values,
    arr_preds=df_test_pred.loc[df_test['patient_id'].values, 't_label'].values,
    arr_labels=sorted(df_test['t_label'].unique()),
    col_label=f"t_label",
    df_data=df_test,
    df_train_data=df_train
)

Unnamed: 0,label,precision,recall,f1,n_train,n_val
0,T1,0.788889,0.270992,0.403409,435,262
1,T2,0.525727,0.570388,0.547148,689,412
2,T3,0.484429,0.777778,0.597015,596,360
3,T4,0.854545,0.345588,0.492147,227,136


# N subtask

In [21]:
accuracy_score(
    y_true=df_test[f'n_label'].values,
    y_pred=df_test_pred.loc[df_test['patient_id'].values, 'n_label'].values
)

0.8427350427350427

In [22]:
utils.calculate_performance(
    arr_gs=df_test['n_label'].values,
    arr_preds=df_test_pred.loc[df_test['patient_id'].values, 'n_label'].values,
    arr_labels=sorted(df_test['n_label'].unique()),
    col_label=f"n_label",
    df_data=df_test,
    df_train_data=df_train
)

Unnamed: 0,label,precision,recall,f1,n_train,n_val
0,N0,0.959762,0.948454,0.954074,1129,679
1,N1,0.846473,0.675497,0.751381,503,302
2,N2,0.562842,0.725352,0.633846,236,142
3,N3,0.466667,0.744681,0.57377,79,47


# M subtask

In [23]:
accuracy_score(
    y_true=df_test['m_label'].values,
    y_pred=df_test_pred.loc[df_test['patient_id'].values, 'm_label'].values
)

0.9085470085470085

In [24]:
utils.calculate_performance(
    arr_gs=df_test[f'm_label'].values,
    arr_preds=df_test_pred.loc[df_test['patient_id'].values, 'm_label'].values,
    arr_labels=sorted(df_test['m_label'].unique()),
    col_label=f"m_label",
    df_data=df_test,
    df_train_data=df_train
)

Unnamed: 0,label,precision,recall,f1,n_train,n_val
0,M0,0.96607,0.935219,0.950394,1821,1096
1,M1,0.348624,0.513514,0.415301,126,74
