In [None]:
#download pretrained ResNet 18 models
!wget -O workspace/models/resnet18-f37072fd.pth https://download.pytorch.org/models/resnet18-f37072fd.pth

In [None]:
# !gdown -O models.zip 1dohuMdbD_NWsFb-RFw8GJwPgD_vVmMqQ
# !unzip -qqnd workspace models.zip

In [None]:
# extract tiles
# generate embeddings
# generate resized datasets from page 5
# train factors
# extract factors
# train relapse model


# Prepare datasets

In [None]:
# generate resized datasets from page 5
!python gen_p5_datasets.py --stage train --raw_img_dir workspace/data/train/whole_slides --meta_csv_path workspace/data/train/train_metadata.csv

##### EXTRACTING PAGE 5 DATASETS #####


In [None]:
# extract tiles
!python gen_masks.py --stage train --raw_img_dir workspace/data/train/whole_slides --meta_csv_path workspace/data/train/train_metadata.csv
!python gen_tiles_56_p3_224.py --stage train --raw_img_dir workspace/data/train/whole_slides --meta_csv_path workspace/data/train/train_metadata.csv
!python gen_tiles_40_p3_320.py --stage train --raw_img_dir workspace/data/train/whole_slides --meta_csv_path workspace/data/train/train_metadata.csv


In [None]:
# generate embeddings
!python gen_embeddings.py --expr expr_56_3_224 --stage train --meta_csv_path workspace/data/train/train_metadata.csv
!python gen_embeddings.py --expr expr_40_2_320 --stage train --meta_csv_path workspace/data/train/train_metadata.csv

# Train

In [None]:
#train swin384 model
EXPR='swin384'
for FOLD in range(5):
    !python train_swin.py --expr {EXPR} --fold {FOLD} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv
    !rm -rf output

In [None]:
#train swin256 model
EXPR='swin256'
for FOLD in range(5):
    !python train_swin.py --expr {EXPR} --fold {FOLD} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv
    !rm -rf output

In [None]:
#train relapse predictive factors (breslow, ulceration)
for EXPR,SEED in [('expr_56_3_224',41),('expr_40_2_320',320)]:
  for VAR in ['breslow','ulceration']:
    !python train_factors.py --expr {EXPR} --var {VAR} --seed {SEED} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv


In [None]:
#generate oof predictions for factors (breslow, ulceration)
for EXPR,SEED in [('expr_56_3_224',41),('expr_40_2_320',320)]:
  for VAR in ['breslow','ulceration']:
    !python gen_factors_train.py --expr {EXPR} --var {VAR} --seed {SEED} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv


In [None]:
#train mlp with breslow and ulceration predictions as features
for EXPR in ['expr_56_3_224','expr_40_2_320']:
  !python train_mlp.py --expr {EXPR} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv
  

# (optional) evaluate training set

In [None]:
#eval swin384 model
EXPR='swin384'
!python gen_preds_swin_train.py --expr {EXPR} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv


In [None]:
#eval swin256 model
EXPR='swin256'
!python gen_preds_swin_train.py --expr {EXPR} --meta_csv_path workspace/data/train/train_metadata.csv --label_csv_path workspace/data/train/train_labels.csv


In [None]:
import pandas as pd, glob
import sklearn.metrics as skm
ROOT_PRED_DIR = f'./workspace/preds/train'

In [None]:
EXPR='expr_56_3_224'
d0=pd.concat(pd.read_csv(p) for p in glob.glob(f'{ROOT_PRED_DIR}/{EXPR}/pred_*.csv'))
d0=d0.groupby('filename').mean().reset_index()
skm.log_loss(d0.relapse,d0.pred)
# 0.3400659913829507

In [None]:
EXPR='expr_40_2_320'
d1=pd.concat(pd.read_csv(p) for p in glob.glob(f'{ROOT_PRED_DIR}/{EXPR}/pred_*.csv'))
d1=d1.groupby('filename').mean().reset_index()
skm.log_loss(d1.relapse,d1.pred)
# 0.3452049203765574

In [None]:
preds_mil=pd.concat([d0,d1]).groupby('filename').mean().reset_index()
sc_log=skm.log_loss(d.relapse,d.pred)
sc_acc = skm.accuracy_score(d.relapse,d.pred.round())
sc_auc = skm.roc_auc_score(d.relapse,d.pred)
sc_log,sc_acc,sc_auc
# (0.3395005245986339, 0.8554396423248882, 0.8301126511059269)

In [None]:
pred_swin256=pd.read_csv(f'{ROOT_PRED_DIR}/pred_swin256.csv')
pred_swin384=pd.read_csv(f'{ROOT_PRED_DIR}/pred_swin384.csv')
sc=skm.log_loss(pred_swin256.relapse,pred_swin256.relapse_pred);print(sc)
sc=skm.log_loss(pred_swin384.relapse,pred_swin384.relapse_pred);print(sc)
# 0.3481908503143639
# 0.3511861126354013

In [None]:
preds_final=pd.concat([d0,d1,pred_swin256,pred_swin384]).groupby('filename').mean()
sc_log=skm.log_loss(preds_final.relapse,preds_final.pred)
sc_acc = skm.accuracy_score(preds_final.relapse,preds_final.pred.round())
sc_auc = skm.roc_auc_score(preds_final.relapse,preds_final.pred)
sc_log,sc_acc,sc_auc
# (0.3351098028650469, 0.8561847988077497, 0.8390448982646989) 