# ViT 2: Electric Transformaroo
We learned a few lessons from the previous experiment. First, and primarily, a larger model doesn't always mean better. We used the large imageNet transformer model, but that didn't actually lead to good predictions. Also, I think I screwed up a bunch of stuff, and the large model is taking way too long to train for me to iterate at all. We are going to switch to a smaller model. We are going to follow the ViTMAE method, use masked auto encodings for pretraining in a self-supervised manner on the entire training set, then fine tune on the prediction task. Guide found here: https://github.com/huggingface/transformers/blob/main/examples/pytorch/image-pretraining/run_mae.py 

This notebook has the fine-tuning process, fine tuning on Binary CE Loss.

In [1]:
# imports
import numpy as np
import pandas as pd

%load_ext autoreload
%autoreload 2

import torch
print(torch.cuda.is_available())

from transformers import ViTFeatureExtractor, ViTForImageClassification, ViTMAEForPreTraining, ViTMAEConfig
from transformers import TrainingArguments, Trainer
from torchvision.transforms import RandomHorizontalFlip, RandomResizedCrop
from torchvision.transforms.functional import InterpolationMode

from sklearn.model_selection import train_test_split

True


In [2]:
from dataloader import *
from utils import *
from trainer import *

In [10]:
# CONSTANTS
FEATURE_EXTRACTOR_NAME = 'facebook/vit-mae-base'
VIT_MODEL_NAME = 'vit-mae-chexpert-fandl-pretrain/'
TRAIN_SPLIT = 0.8
BATCH_SIZE = 48
LEARNING_RATE = 1.5e-2
LR_SCHEDULER_TYPE = "cosine"
WEIGHT_DECAY = 1e-5
WARMUP_RATIO = 0.05
LOGGING_STRATEGY = "steps"
LOGGING_STEPS = 10
FP16 = True
EPOCHS = 15
EVALUATION_STRATEGY = "epoch"
EVAL_STEPS = 200
OUTPUT_DIR = './vit-mae-chexpert-auc-fine-tuned-fandl-1'
REMOVE_UNUSED_COLUMNS = False
GRAD_ACCUM_STEPS = 5
DATALOADER_NUM_WORKERS = 4
MARGIN = 1.0
GAMMA = 500

In [4]:
feature_extractor = ViTFeatureExtractor.from_pretrained(FEATURE_EXTRACTOR_NAME, image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225])

In [5]:
# set up our transforms
transforms = [
    RandomResizedCrop(feature_extractor.size, scale=(0.2, 1.0), interpolation=InterpolationMode.BICUBIC),
    RandomHorizontalFlip(),
]

In [6]:
np.random.seed(42)
train_df = pd.read_csv("ChexPert/train.csv")
train_df, eval_df = train_test_split(train_df, train_size=TRAIN_SPLIT)

train_dataset = ChexpertViTDataset("ChexPert/data", train_df, feature_extractor, transforms=transforms, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85)
eval_dataset = ChexpertViTDataset("ChexPert/data", eval_df, feature_extractor, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85)

In [7]:
imratio=[0.1497333982353369,
 0.12095831165270714,
 0.0663119436471569,
 0.2344081328924473,
 0.3860606162333339]

In [8]:
model = ViTForImageClassification.from_pretrained(
    VIT_MODEL_NAME,
    num_labels=len(train_dataset.labels)
).to("cuda")

You are using a model of type vit_mae to instantiate a model of type vit. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vit-mae-chexpert-fandl-pretrain/ were not used when initializing ViTForImageClassification: ['decoder.decoder_pred.weight', 'decoder.decoder_layers.2.attention.output.dense.weight', 'decoder.decoder_layers.4.attention.output.dense.weight', 'decoder.decoder_layers.1.intermediate.dense.bias', 'decoder.decoder_layers.4.attention.attention.query.weight', 'decoder.decoder_layers.3.output.dense.bias', 'decoder.decoder_layers.5.layernorm_before.bias', 'decoder.decoder_layers.3.layernorm_after.bias', 'decoder.decoder_layers.7.output.dense.weight', 'decoder.decoder_layers.3.attention.attention.key.bias', 'decoder.decoder_layers.6.attention.output.dense.weight', 'decoder.decoder_layers.6.attention.attention.query.weight', 'decoder.decoder_layers.4.layernorm_before.weight', 'decoder.decoder_layers.0.intermedi

In [11]:
# set up training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    evaluation_strategy=EVALUATION_STRATEGY,
    num_train_epochs=EPOCHS,
    fp16=FP16,
    eval_steps = EVAL_STEPS,
    learning_rate=LEARNING_RATE,
    remove_unused_columns=REMOVE_UNUSED_COLUMNS,
    report_to="tensorboard",
    gradient_accumulation_steps=GRAD_ACCUM_STEPS,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    weight_decay=WEIGHT_DECAY,
    warmup_ratio=WARMUP_RATIO,
    logging_strategy=LOGGING_STRATEGY,
    logging_steps=LOGGING_STEPS,
    dataloader_num_workers=DATALOADER_NUM_WORKERS
)

In [12]:
trainer = AUCTrainer(
    imratio=imratio,
    margin=MARGIN,
    gamma=GAMMA,
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

Using amp half precision backend


In [13]:
trainer.train()

***** Running training *****
  Num examples = 152878
  Num Epochs = 15
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 240
  Gradient Accumulation steps = 5
  Total optimization steps = 9555


Epoch,Training Loss,Validation Loss,Accuracy,F1,Auc Atelectasis,Auc Cardiomegaly,Auc Consolidation,Auc Edema,Auc Pleural effusion,Average Auc
1,0.5794,0.5716,0.254161,0.294534,0.579493,0.585555,0.580639,0.724,0.671271,0.628192
2,0.4178,0.452843,0.19644,0.52455,0.597232,0.614923,0.622189,0.746396,0.752835,0.666715
3,0.3989,0.408223,0.100789,0.533448,0.605006,0.662061,0.629376,0.75106,0.782794,0.68606
4,0.365,0.368,0.098613,0.545097,0.606642,0.750744,0.633435,0.771743,0.801871,0.712887
5,0.3416,0.344932,0.082125,0.539775,0.615015,0.778127,0.638374,0.786769,0.812897,0.726237
6,0.3371,0.336182,0.106713,0.547382,0.617072,0.786406,0.638981,0.795993,0.816964,0.731083
7,0.2925,0.332017,0.111484,0.548983,0.616372,0.791806,0.637332,0.801679,0.81933,0.733304
8,0.3213,0.328121,0.089701,0.545831,0.621864,0.796869,0.639855,0.805032,0.823059,0.737335
9,0.348,0.327259,0.103358,0.541047,0.623173,0.796248,0.639191,0.806344,0.823748,0.737741
10,0.303,0.325687,0.106818,0.545543,0.622052,0.798303,0.638295,0.809131,0.82189,0.737934


Saving model checkpoint to ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-500
Configuration saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-500/config.json
Model weights saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 38149
  Batch size = 48
Saving model checkpoint to ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-1000
Configuration saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-1000/config.json
Model weights saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 38149
  Batch size = 48
Saving model checkpoint to ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-1500
Configuration saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-1500/config.json
Model weights saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/checkpoint-1500/pytorch_model.bin
***** Running Evaluation ***

RuntimeError: [enforce fail at inline_container.cc:300] . unexpected pos 40216896 vs 40216784

In [14]:
trainer.save_model()

Saving model checkpoint to ./vit-mae-chexpert-auc-fine-tuned-fandl-1
Configuration saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/config.json
Model weights saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-1/pytorch_model.bin


In [18]:
validation_df = pd.read_csv("ChexPert/valid.csv")
valid_dataset = ChexpertViTDataset("ChexPert/data", validation_df, feature_extractor, use_frontal=False, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85)

In [19]:
trainer.evaluate(eval_dataset=valid_dataset)

***** Running Evaluation *****
  Num examples = 234
  Batch size = 48


{'eval_loss': 0.0041694967076182365,
 'eval_accuracy': 0.14957264957264957,
 'eval_f1': 0.515986769570011,
 'eval_AUC_Atelectasis': 0.8620535714285715,
 'eval_AUC_Cardiomegaly': 0.7739192062367115,
 'eval_AUC_Consolidation': 0.9019297452133272,
 'eval_AUC_Edema': 0.8767195767195767,
 'eval_AUC_Pleural Effusion': 0.8564661721333452,
 'eval_average_auc': 0.8542176543463065}

In [17]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 38149
  Batch size = 48


{'eval_loss': 0.322690486907959,
 'eval_accuracy': 0.10815486644472988,
 'eval_f1': 0.5490435037351692,
 'eval_AUC_Atelectasis': 0.6269687181902108,
 'eval_AUC_Cardiomegaly': 0.801791253731213,
 'eval_AUC_Consolidation': 0.6394674828343314,
 'eval_AUC_Edema': 0.811238557111992,
 'eval_AUC_Pleural Effusion': 0.8265778366669982,
 'eval_average_auc': 0.741208769706949}