In [1]:
# imports
import numpy as np
import pandas as pd

%load_ext autoreload
%autoreload 2

import torch
print(torch.cuda.is_available())

from transformers import ViTFeatureExtractor, ViTForImageClassification, ViTMAEForPreTraining, ViTMAEConfig
from transformers import TrainingArguments, Trainer
from torchvision.transforms import RandomHorizontalFlip, RandomResizedCrop
from torchvision.transforms.functional import InterpolationMode

from sklearn.model_selection import train_test_split

True


In [2]:
from dataloader import *
from utils import *
from trainer import *

In [13]:
# CONSTANTS
FEATURE_EXTRACTOR_NAME = 'facebook/vit-mae-base'
VIT_MODEL_NAME = 'vit-mae-chexpert-fandl-pretrain/'
TRAIN_SPLIT = 0.8
BATCH_SIZE = 48
HIGH_LEARNING_RATE = 1.5e-2
LOW_LEARNING_RATE = 1.5e-4
LR_SCHEDULER_TYPE = "cosine"
HIGH_WEIGHT_DECAY = 0.05
LOW_WEIGHT_DECAY = 1e-5
WARMUP_RATIO = 0.05
LOGGING_STRATEGY = "steps"
LOGGING_STEPS = 10
FP16 = True
EPOCHS = 1
EVALUATION_STRATEGY = "steps"
EVAL_STEPS = 100
OUTPUT_DIR = './vit-mae-chexpert-auc-fine-tuned-fandl-4'
REMOVE_UNUSED_COLUMNS = False
GRAD_ACCUM_STEPS = 5
DATALOADER_NUM_WORKERS = 4
MARGIN = 1.0
GAMMA = 500

In [4]:
feature_extractor = ViTFeatureExtractor.from_pretrained(FEATURE_EXTRACTOR_NAME, image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225])

In [5]:
# set up our transforms
transforms = [
    RandomResizedCrop(feature_extractor.size, scale=(0.2, 1.0), interpolation=InterpolationMode.BICUBIC),
    RandomHorizontalFlip(),
]

In [6]:
np.random.seed(42)
train_df = pd.read_csv("ChexPert/train.csv")
train_df, eval_df = train_test_split(train_df, train_size=TRAIN_SPLIT)

eval_df = eval_df.sample(20000)

train_dataset = ChexpertViTDataset("ChexPert/data", train_df, feature_extractor, transforms=transforms, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85)
eval_dataset = ChexpertViTDataset("ChexPert/data", eval_df, feature_extractor, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85)

In [7]:
imratio=[0.1497333982353369,
 0.12095831165270714,
 0.0663119436471569,
 0.2344081328924473,
 0.3860606162333339]

In [8]:
model = ViTForImageClassification.from_pretrained(
    VIT_MODEL_NAME,
    num_labels=len(train_dataset.labels)
).to("cuda")

You are using a model of type vit_mae to instantiate a model of type vit. This is not supported for all configurations of models and can yield errors.
Some weights of the model checkpoint at vit-mae-chexpert-fandl-pretrain/ were not used when initializing ViTForImageClassification: ['decoder.decoder_layers.4.layernorm_before.bias', 'decoder.decoder_layers.5.attention.attention.value.weight', 'decoder.decoder_layers.3.layernorm_after.weight', 'decoder.decoder_layers.6.output.dense.bias', 'decoder.decoder_layers.0.intermediate.dense.weight', 'decoder.decoder_layers.1.layernorm_before.weight', 'decoder.decoder_norm.weight', 'decoder.decoder_layers.3.intermediate.dense.weight', 'decoder.decoder_layers.4.attention.attention.query.bias', 'decoder.decoder_layers.0.layernorm_after.weight', 'decoder.decoder_layers.3.intermediate.dense.bias', 'decoder.decoder_layers.5.intermediate.dense.weight', 'decoder.decoder_layers.7.attention.output.dense.bias', 'decoder.decoder_layers.4.attention.attention

In [14]:
# set up training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    evaluation_strategy=EVALUATION_STRATEGY,
    num_train_epochs=EPOCHS,
    fp16=FP16,
    eval_steps = EVAL_STEPS,
    learning_rate=LOW_LEARNING_RATE,
    remove_unused_columns=REMOVE_UNUSED_COLUMNS,
    report_to="tensorboard",
    gradient_accumulation_steps=GRAD_ACCUM_STEPS,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    weight_decay=HIGH_WEIGHT_DECAY,
    warmup_ratio=WARMUP_RATIO,
    logging_strategy=LOGGING_STRATEGY,
    logging_steps=LOGGING_STEPS,
    dataloader_num_workers=DATALOADER_NUM_WORKERS
)

PyTorch: setting up devices


In [15]:
trainer = AUCTrainer(
    imratio=imratio,
    margin=MARGIN,
    gamma=GAMMA,
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

Using amp half precision backend


In [16]:
results = trainer.train()

***** Running training *****
  Num examples = 152878
  Num Epochs = 1
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 240
  Gradient Accumulation steps = 5
  Total optimization steps = 637


Step,Training Loss,Validation Loss,Accuracy,F1,Auc Atelectasis,Auc Cardiomegaly,Auc Consolidation,Auc Edema,Auc Pleural effusion,Average Auc
100,0.3565,0.361219,0.03497,0.294795,0.499232,0.56022,0.565276,0.35747,0.411718,0.478783
200,0.3628,0.366665,0.056621,0.23428,0.49923,0.559846,0.565194,0.359029,0.409537,0.478567
300,0.3668,0.370935,0.125682,0.169292,0.499168,0.559549,0.565057,0.360339,0.407715,0.478366
400,0.3713,0.373736,0.180015,0.089957,0.499062,0.559406,0.565004,0.361017,0.406807,0.478259
500,0.3721,0.375032,0.199613,0.052848,0.49905,0.5593,0.565023,0.361465,0.406292,0.478226
600,0.3721,0.37543,0.204483,0.044541,0.499015,0.559293,0.565034,0.361565,0.406145,0.47821


***** Running Evaluation *****
  Num examples = 17043
  Batch size = 48
***** Running Evaluation *****
  Num examples = 17043
  Batch size = 48
***** Running Evaluation *****
  Num examples = 17043
  Batch size = 48
***** Running Evaluation *****
  Num examples = 17043
  Batch size = 48
***** Running Evaluation *****
  Num examples = 17043
  Batch size = 48
Saving model checkpoint to ./vit-mae-chexpert-auc-fine-tuned-fandl-4/checkpoint-500
Configuration saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-4/checkpoint-500/config.json
Model weights saved in ./vit-mae-chexpert-auc-fine-tuned-fandl-4/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 17043
  Batch size = 48


Training completed. Do not forget to share your model on huggingface.co/models =)




In [17]:
trainer.save_state()

TypeError: log() missing 1 required positional argument: 'logs'