In [10]:
#MURA dataset is located in my googe drive, can also mount from file system
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import torch
from torchvision.io import read_image
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from torch import nn

# Load and Prep Data

## Import and create dataframes

In [11]:
train_img_paths = pd.read_csv('drive/MyDrive/MURA-v1.1/train_image_paths.csv', header = None, names = ['image_path'])
test_img_paths = pd.read_csv('drive/MyDrive/MURA-v1.1/valid_image_paths.csv', header = None, names = ['image_path'])

In [12]:
pd.set_option('max_colwidth', None)
train_img_paths.head()

Unnamed: 0,image_path
0,MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image1.png
1,MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image2.png
2,MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image3.png
3,MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image1.png
4,MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image2.png


In [13]:
#helper funtion to extract info from train and test dataframes
def split_img_paths(df):
    temp = df['image_path'].str.split('/', expand=True)
    new_df = pd.DataFrame(df['image_path'])
    new_df['patient_id'] = temp[3]
    new_df['study'] = temp[4].str.split('_', expand = True)[0].str[-1:].astype(int)
    new_df['region'] = temp[2].str.split('_', expand = True)[1]
    map = {'positive':'abnormal', 'negative':'normal'}
    new_df['target'] = temp[4].str.split('_', expand = True)[1]
    new_df['target'] = new_df['target'].map(map)
    #add google drive location to image path
    new_df['image_path'] = 'drive/MyDrive/' + new_df['image_path']
    
    return new_df

In [14]:
mura_train = split_img_paths(train_img_paths)
mura_test = split_img_paths(test_img_paths)
mura_train.head()

Unnamed: 0,image_path,patient_id,study,region,target
0,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image1.png,patient00001,1,SHOULDER,abnormal
1,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image2.png,patient00001,1,SHOULDER,abnormal
2,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image3.png,patient00001,1,SHOULDER,abnormal
3,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image1.png,patient00002,1,SHOULDER,abnormal
4,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image2.png,patient00002,1,SHOULDER,abnormal


In [15]:
def mura_to14(df):
    new_df = df.copy()
    new_df['target'] = df['target'] + '_' + df['region']
    return new_df

In [16]:
mura_train_14 = mura_to14(mura_train)
mura_test_14 = mura_to14(mura_test)
mura_train_14.head()

Unnamed: 0,image_path,patient_id,study,region,target
0,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image1.png,patient00001,1,SHOULDER,abnormal_SHOULDER
1,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image2.png,patient00001,1,SHOULDER,abnormal_SHOULDER
2,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image3.png,patient00001,1,SHOULDER,abnormal_SHOULDER
3,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image1.png,patient00002,1,SHOULDER,abnormal_SHOULDER
4,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image2.png,patient00002,1,SHOULDER,abnormal_SHOULDER


### Create seperate df for train and test for different regions

In [17]:
wrist_train = mura_train[mura_train['region']=='WRIST']
forearm_train = mura_train[mura_train['region']=='FOREARM']
hand_train = mura_train[mura_train['region']=='HAND']
humerus_train = mura_train[mura_train['region']=='HUMERUS']
elbow_train = mura_train[mura_train['region']=='ELBOW']
shoulder_train = mura_train[mura_train['region']=='SHOULDER']
finger_train = mura_train[mura_train['region']=='FINGER']

In [18]:
wrist_test = mura_train[mura_train['region']=='WRIST']
forearm_test = mura_train[mura_train['region']=='FOREARM']
hand_test = mura_train[mura_train['region']=='HAND']
humerus_test = mura_train[mura_train['region']=='HUMERUS']
elbow_test = mura_train[mura_train['region']=='ELBOW']
shoulder_test = mura_train[mura_train['region']=='SHOULDER']
finger_test = mura_train[mura_train['region']=='FINGER']

## Create custom Datasets and Dataloaders

In [19]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((256,256)),
    transforms.ToTensor()
])

In [20]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders
def class_dict(two_targets):
    classes_2 = ['normal', 'abnormal']
    dict_2 = {'normal' : 0, 'abnormal': 1}
    classes_14 = ['abnormal_WRIST', 'normal_WRIST','abnormal_FOREARM', 'normal_FOREARM',
                      'abnormal_HAND', 'normal_HAND', 'abnormal_HUMERUS', 'normal_HUMERUS',
                      'abnormal_ELBOW', 'normal_ELBOW', 'abnormal_SHOULDER', 'normal_SHOULDER',
                      'abnormal_FINGER', 'normal_FINGER']
    dict_14 = {'abnormal_WRIST': 0, 'normal_WRIST': 1,'abnormal_FOREARM' : 2, 'normal_FOREARM' : 3,
                      'abnormal_HAND': 4, 'normal_HAND' : 5, 'abnormal_HUMERUS': 6, 'normal_HUMERUS' : 7,
                      'abnormal_ELBOW': 8, 'normal_ELBOW' : 9, 'abnormal_SHOULDER' : 10, 'normal_SHOULDER': 11,
                      'abnormal_FINGER': 12, 'normal_FINGER' : 13}
    if two_targets == True:
      return classes_2, dict_2
    else:
      return classes_14, dict_14

In [21]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders

class MuraDataset(Dataset):
    def __init__(self, mura_df, transform=data_transforms, two_targets=True):
        self.mura = mura_df
        self.transform = transform
        self.classes, self.class_to_idx = class_dict(two_targets=two_targets)

    def __len__(self):
        return len(self.mura)

    def get_image_class(self, index):
        image_path = self.mura.iloc[index]['image_path']
        class_name = self.mura.iloc[index]['target']
        #return read_image(image_path), class_name
        return Image.open(image_path), class_name  

    def __getitem__(self, idx):
        image, class_name = self.get_image_class(idx)
        label = self.class_to_idx[class_name]

        if self.transform:
          return self.transform(image), label
        else:
          return self.transform(image), label


In [22]:
#test on elbow train
elbow_train_dataset = MuraDataset(mura_df = elbow_train)

In [23]:
elbow_test_dataset = MuraDataset(mura_df = elbow_test)

In [24]:
len(elbow_train_dataset)

4931

In [25]:
elbow_train_dataset.classes

['normal', 'abnormal']

In [26]:
elbow_train_dataset.class_to_idx

{'normal': 0, 'abnormal': 1}

In [27]:
elbow_train_dataloader = DataLoader(elbow_train_dataset, batch_size=32, shuffle=True)

In [28]:
elbow_test_dataloader = DataLoader(elbow_test_dataset, batch_size=32, shuffle=True)

In [29]:
i, l = next(iter(elbow_train_dataloader))

In [30]:
i, l

(tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.3804, 0.3843, 0.3843],
           [0.0000, 0.0000, 0.0000,  ..., 0.4039, 0.4078, 0.4078],
           [0.0000, 0.0000, 0.0000,  ..., 0.3451, 0.3451, 0.3451],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.1804, 0.2039, 0.2235],
           [0.0000, 0.0000, 0.0000,  ..., 0.1843, 0.2039, 0.2235],
           [0.0000, 0.0000, 0.0000,  ..., 0.2157, 0.2314, 0.2510]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.1255, 0.1294, 0.1294],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000

In [31]:
print(i.shape, l.shape)

torch.Size([32, 1, 256, 256]) torch.Size([32])


In [32]:
#dict = {}
#for i in range(len(train_dataset)):
#    img, label = next(iter(train_dataloader))
#    shape = img.shape
#    dict[shape] = dict.get(shape, 0) + 1
     


In [33]:
#dict

In [34]:
x = Image.open(elbow_train.iloc[120]['image_path']).convert('RGB')

In [35]:
x = data_transforms(x)

In [36]:
x.shape

torch.Size([1, 256, 256])

## Create all Datasets and Dataloaders

# Base Model

In [37]:
device = "cuda" if torch.cuda.is_available() else "cpu"

Implement CNN from:

https://www.learnpytorch.io/03_pytorch_computer_vision/#7-model-2-building-a-convolutional-neural-network-cnn

In [38]:
# Create a convolutional neural network 
class base_model(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3, # how big is the square that's going over the image?
                      stride=1, # default
                      padding=1),# options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number 
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2) # default stride value is same as kernel_size
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            # Where did this in_features shape come from? 
            # It's because each layer of our network compresses and changes the shape of our inputs data.
            nn.Linear(in_features=hidden_units*64*64, 
                      out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        # print(x.shape)
        x = self.block_2(x)
        # print(x.shape)
        x = self.classifier(x)
        # print(x.shape)
        return x


In [39]:
torch.manual_seed(42)
elbow_base_model = base_model(input_shape=1, 
    hidden_units=10, 
    output_shape=len(elbow_test_dataset.classes)).to(device)
elbow_base_model

base_model(
  (block_1): Sequential(
    (0): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=40960, out_features=2, bias=True)
  )
)

In [40]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=elbow_base_model.parameters(), 
                             lr=0.1)

In [41]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100 
    return acc

In [42]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss, train_acc = 0, 0
    for batch, (X, y) in enumerate(data_loader):
        # Send data to GPU
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y,
                                 y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")

def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    model.eval() # put model in eval mode
    # Turn on inference context manager
    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred = model(X)
            
            # 2. Calculate loss and accuracy
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y,
                y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
            )
        
        # Adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [None]:
torch.manual_seed(42)

# Measure time
from tqdm import tqdm
from timeit import default_timer as timer
train_time_start_elbow_base_model = timer()

# Train and test model 
epochs = 3
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=elbow_train_dataloader, 
        model=elbow_base_model, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=elbow_test_dataloader,
        model=elbow_base_model,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_elbow_base_model = timer()


In [43]:
from timeit import default_timer as timer 
def print_train_time(start: float, end: float, device: torch.device = None):
    """Prints difference between start and end time.

    Args:
        start (float): Start time of computation (preferred in timeit format). 
        end (float): End time of computation.
        device ([type], optional): Device that compute is running on. Defaults to None.

    Returns:
        float: time between start and end in seconds (higher is longer).
    """
    total_time = end - start
    print(f"Train time on {device}: {total_time/60:.3f} minutes")
    return total_time

In [None]:
total_train_time_elbow_base_model = print_train_time(start=train_time_start_elbow_base_model,
                                           end=train_time_end_elbow_base_model,
                                           device=device)

**Elbow With batch size = 1**
 
  0%|          | 0/3 [00:00<?, ?it/s]Epoch: 0
---------
Train loss: 2927377804200474425950208.00000 | Train accuracy: 57.96%
 33%|███▎      | 1/3 [1:32:51<3:05:42, 5571.33s/it]Test loss: 0.68236 | Test accuracy: 59.32%

Epoch: 1
---------
Train loss: 0.68688 | Train accuracy: 57.41%
 67%|██████▋   | 2/3 [1:34:09<39:00, 2340.27s/it]  Test loss: 0.70821 | Test accuracy: 59.32%

Epoch: 2
---------
Train loss: 0.68935 | Train accuracy: 57.70%
100%|██████████| 3/3 [1:35:26<00:00, 1908.91s/it]Test loss: 0.71157 | Test accuracy: 40.68%



**Elbow With batch size = 32**

0%|          | 0/3 [00:00<?, ?it/s]Epoch: 0
---------
Train loss: 0.67527 | Train accuracy: 59.56%
 33%|███▎      | 1/3 [19:56<39:53, 1196.83s/it]Test loss: 0.67626 | Test accuracy: 59.36%

Epoch: 1
---------
Train loss: 0.67621 | Train accuracy: 59.36%
 67%|██████▋   | 2/3 [21:01<08:50, 530.74s/it] Test loss: 0.67643 | Test accuracy: 59.17%

Epoch: 2
---------
Train loss: 0.67608 | Train accuracy: 59.36%
100%|██████████| 3/3 [22:04<00:00, 441.58s/it]Test loss: 0.67713 | Test accuracy: 58.97%


Train time on cuda: 22.079 minutes


https://huggingface.co/blog/fine-tune-vit

In [None]:
pip install datasets transformers

In [139]:
from transformers import ViTFeatureExtractor

model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)



In [142]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

In [143]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [144]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

In [149]:
from transformers import ViTForImageClassification

labels = elbow_test_dataset.classes

model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [146]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./vit-base-beans",
  per_device_train_batch_size=16,
  evaluation_strategy="steps",
  num_train_epochs=4,
  fp16=True,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)

In [130]:
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

In [131]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders

class MuraDataset(Dataset):
    def __init__(self, mura_df, transform=data_transforms, two_targets=True):
        self.mura = mura_df
        self.transform = transform
        self.classes, self.class_to_idx = class_dict(two_targets=two_targets)

    def __len__(self):
        return len(self.mura)

    def get_image_class(self, index):
        image_path = self.mura.iloc[index]['image_path']
        class_name = self.mura.iloc[index]['target']
        #return read_image(image_path), class_name
        return Image.open(image_path), class_name  

    def __getitem__(self, idx):
        image, class_name = self.get_image_class(idx)
        label = self.class_to_idx[class_name]

        if self.transform:
          return self.transform(image), label
        else:
          return self.transform(image), label

In [132]:
elbow_train_dataset = MuraDataset(mura_df = elbow_train[0:10])
elbow_test_dataset = MuraDataset(mura_df = elbow_test[0:10])

In [138]:
elbow_train_dict = {}
elbow_test_dict = {}

for i in range(len(elbow_train_dataset)):
    elbow_train_dict[i] = {'image': elbow_train_dataset[i][0], 'labels': elbow_train_dataset[i][0]}
    elbow_test_dict[i] = {'image': elbow_test_dataset[i][0], 'labels': elbow_test_dataset[i][0]}


In [147]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=elbow_train_dict,
    eval_dataset=elbow_test_dict,
    tokenizer=feature_extractor,
)

In [148]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()



KeyError: ignored

In [None]:
metrics = trainer.evaluate(prepared_ds['validation'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)