# Swin Transfomer

BCE loss implemented correctly

average auc calculated manually using loss for each diagnosis

In [None]:
# Mount into drive
from google.colab import drive
drive.mount("/content/drive")
!cp '/content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/CheXpert-v1.0-small.zip' /content/
!mkdir CheXpert
!unzip '/content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/CheXpert-v1.0-small.zip' -d /content/CheXpert
import sys
sys.path.append('/content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/tyler')
sys.path.append('/content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation')
sys.path.append('/content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/models')

In [None]:
# imports
!pip install transformers
import numpy as np
import pandas as pd

%load_ext autoreload
%autoreload 2

import torch
print(torch.cuda.is_available())

from transformers import AutoFeatureExtractor, SwinForImageClassification, SwinConfig
from transformers import TrainingArguments, Trainer
from torchvision.transforms import RandomHorizontalFlip, RandomResizedCrop
from torchvision.transforms.functional import InterpolationMode

from sklearn.model_selection import train_test_split

In [None]:
!pip install libauc
from dataloader import *
from utils import *
from trainer import *

In [None]:
# CONSTANTS
FEATURE_EXTRACTOR_NAME = 'microsoft/swin-base-patch4-window7-224'
VIT_MODEL_NAME = 'microsoft/swin-base-patch4-window7-224'
TRAIN_SPLIT = 0.8
BATCH_SIZE = 48
LEARNING_RATE = 1.5e-3
LR_SCHEDULER_TYPE = "cosine"
WEIGHT_DECAY = 0.05
WARMUP_RATIO = 0.05
LOGGING_STRATEGY = "steps"
LOGGING_STEPS = 10
FP16 = True
EPOCHS = 10
EVALUATION_STRATEGY = "epoch"
EVAL_STEPS = 200
OUTPUT_DIR = '/content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output'
REMOVE_UNUSED_COLUMNS = False
GRAD_ACCUM_STEPS = 5
DATALOADER_NUM_WORKERS = 4

In [None]:
feature_extractor = AutoFeatureExtractor.from_pretrained(FEATURE_EXTRACTOR_NAME, image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225])

Downloading:   0%|          | 0.00/255 [00:00<?, ?B/s]

In [None]:
# set up our transforms
transforms = [
    RandomResizedCrop(feature_extractor.size, scale=(0.2, 1.0), interpolation=InterpolationMode.BICUBIC),
    RandomHorizontalFlip(),
]

In [None]:
np.random.seed(42)
train_df = pd.read_csv("/content/CheXpert/CheXpert-v1.0-small/train.csv")
train_df, eval_df = train_test_split(train_df, train_size=TRAIN_SPLIT)

train_dataset = ChexpertViTDataset("/content/CheXpert", train_df, feature_extractor, transforms=transforms, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85, use_frontal = False)
eval_dataset = ChexpertViTDataset("/content/CheXpert", eval_df, feature_extractor, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85, use_frontal = False)

In [None]:
model = SwinForImageClassification.from_pretrained(
    VIT_MODEL_NAME,
    num_labels=len(train_dataset.labels),
    ignore_mismatched_sizes = True
).to("cuda")

Downloading:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/335M [00:00<?, ?B/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([5, 1024]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# set up training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    evaluation_strategy=EVALUATION_STRATEGY,
    num_train_epochs=EPOCHS,
    fp16=FP16,
    eval_steps = EVAL_STEPS,
    learning_rate=LEARNING_RATE,
    remove_unused_columns=REMOVE_UNUSED_COLUMNS,
    report_to="tensorboard",
    gradient_accumulation_steps=GRAD_ACCUM_STEPS,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    weight_decay=WEIGHT_DECAY,
    warmup_ratio=WARMUP_RATIO,
    logging_strategy=LOGGING_STRATEGY,
    logging_steps=LOGGING_STEPS,
    dataloader_num_workers=DATALOADER_NUM_WORKERS
)

In [None]:
trainer = MultiLabelTrainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

Using amp half precision backend


In [None]:
trainer.train()

***** Running training *****
  Num examples = 178731
  Num Epochs = 10
  Instantaneous batch size per device = 48
  Total train batch size (w. parallel, distributed & accumulation) = 240
  Gradient Accumulation steps = 5
  Total optimization steps = 7440


Epoch,Training Loss,Validation Loss,Accuracy,F1,Auc Atelectasis,Auc Cardiomegaly,Auc Consolidation,Auc Edema,Auc Pleural effusion
0,0.4665,0.461647,0.314773,0.349568,0.675752,0.775204,0.649969,0.79612,0.829578
1,0.467,0.442931,0.344561,0.462309,0.679684,0.799272,0.659474,0.815262,0.844354
2,0.4529,0.447847,0.338003,0.413827,0.680502,0.791029,0.661756,0.820727,0.84484
3,0.4564,0.439406,0.347425,0.468146,0.687642,0.815315,0.668635,0.819412,0.850743
4,0.447,0.431632,0.349954,0.447965,0.690407,0.818786,0.673748,0.83106,0.856612
5,0.4412,0.429835,0.355303,0.506524,0.6974,0.822069,0.679805,0.837027,0.862678
6,0.4478,0.423867,0.363785,0.503155,0.699785,0.831796,0.681194,0.841611,0.866402
7,0.4362,0.418059,0.366985,0.506299,0.706208,0.837253,0.689498,0.844203,0.871248
8,0.4143,0.415012,0.373162,0.516476,0.709916,0.839145,0.690368,0.849718,0.873577


Saving model checkpoint to /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/checkpoint-500
Configuration saved in /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/checkpoint-500/config.json
Model weights saved in /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 44683
  Batch size = 48
Saving model checkpoint to /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/checkpoint-1000
Configuration saved in /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/checkpoint-1000/config.json
Model weights saved in /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tune

Epoch,Training Loss,Validation Loss,Accuracy,F1,Auc Atelectasis,Auc Cardiomegaly,Auc Consolidation,Auc Edema,Auc Pleural effusion
0,0.4665,0.461647,0.314773,0.349568,0.675752,0.775204,0.649969,0.79612,0.829578
1,0.467,0.442931,0.344561,0.462309,0.679684,0.799272,0.659474,0.815262,0.844354
2,0.4529,0.447847,0.338003,0.413827,0.680502,0.791029,0.661756,0.820727,0.84484
3,0.4564,0.439406,0.347425,0.468146,0.687642,0.815315,0.668635,0.819412,0.850743
4,0.447,0.431632,0.349954,0.447965,0.690407,0.818786,0.673748,0.83106,0.856612
5,0.4412,0.429835,0.355303,0.506524,0.6974,0.822069,0.679805,0.837027,0.862678
6,0.4478,0.423867,0.363785,0.503155,0.699785,0.831796,0.681194,0.841611,0.866402
7,0.4362,0.418059,0.366985,0.506299,0.706208,0.837253,0.689498,0.844203,0.871248
8,0.4143,0.415012,0.373162,0.516476,0.709916,0.839145,0.690368,0.849718,0.873577
9,0.4278,0.414367,0.373431,0.522517,0.710263,0.840343,0.691253,0.849733,0.874067


***** Running Evaluation *****
  Num examples = 44683
  Batch size = 48


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=7440, training_loss=0.4465117412228738, metrics={'train_runtime': 40848.546, 'train_samples_per_second': 43.755, 'train_steps_per_second': 0.182, 'total_flos': 1.4001936125630128e+20, 'train_loss': 0.4465117412228738, 'epoch': 10.0})

In [None]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 44683
  Batch size = 48


{'epoch': 10.0,
 'eval_AUC_Atelectasis': 0.7102629943604462,
 'eval_AUC_Cardiomegaly': 0.840342841555816,
 'eval_AUC_Consolidation': 0.6912530769091636,
 'eval_AUC_Edema': 0.8497326658106499,
 'eval_AUC_Pleural Effusion': 0.874066771768063,
 'eval_accuracy': 0.373430611194414,
 'eval_f1': 0.5225168283664634,
 'eval_loss': 0.41436660289764404,
 'eval_runtime': 323.2158,
 'eval_samples_per_second': 138.245,
 'eval_steps_per_second': 2.88}

In [None]:
trainer.save_state()

In [None]:
trainer.save_model()

Saving model checkpoint to /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output
Configuration saved in /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/config.json
Model weights saved in /content/drive/MyDrive/Georgia Tech/CS 7643 Deep Learning/chestx_interpretation/swin-chexpert-fine-tuned-output/pytorch_model.bin


In [None]:
validation_df = pd.read_csv("/content/CheXpert/CheXpert-v1.0-small/valid.csv")
valid_dataset = ChexpertViTDataset("/content/CheXpert", validation_df, feature_extractor, classes=COMPETITION_TASKS,
    uncertainty_method="smooth", smoothing_lower_bound=0.55, smoothing_upper_bound=0.85, use_frontal = False)

In [None]:
trainer.evaluate(eval_dataset=valid_dataset)

***** Running Evaluation *****
  Num examples = 234
  Batch size = 48


{'epoch': 10.0,
 'eval_AUC_Atelectasis': 0.8227678571428573,
 'eval_AUC_Cardiomegaly': 0.8189670446491849,
 'eval_AUC_Consolidation': 0.9027589326096788,
 'eval_AUC_Edema': 0.9233392122281011,
 'eval_AUC_Pleural Effusion': 0.9306908570917867,
 'eval_accuracy': 0.4658119658119658,
 'eval_f1': 0.4759615384615385,
 'eval_loss': 0.4080140292644501,
 'eval_runtime': 2.4658,
 'eval_samples_per_second': 94.896,
 'eval_steps_per_second': 2.028}