In [1]:
#MURA dataset is located in my googe drive, can also mount from file system
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import torch
from torchvision.io import read_image
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from torch import nn
from tqdm.auto import tqdm

# Load and Prep Data

## Import and create dataframes

In [3]:
train_img_paths = pd.read_csv('drive/MyDrive/MURA-v1.1/train_image_paths.csv', header = None, names = ['image_path'])
test_img_paths = pd.read_csv('drive/MyDrive/MURA-v1.1/valid_image_paths.csv', header = None, names = ['image_path'])

In [4]:
#helper funtion to extract info from train and test dataframes
def split_img_paths(df):
    temp = df['image_path'].str.split('/', expand=True)
    new_df = pd.DataFrame(df['image_path'])
    new_df['patient_id'] = temp[3]
    new_df['study'] = temp[4].str.split('_', expand = True)[0].str[-1:].astype(int)
    new_df['region'] = temp[2].str.split('_', expand = True)[1]
    map = {'positive':'abnormal', 'negative':'normal'}
    new_df['target'] = temp[4].str.split('_', expand = True)[1]
    new_df['target'] = new_df['target'].map(map)
    #add google drive location to image path
    new_df['image_path'] = 'drive/MyDrive/' + new_df['image_path']
    
    return new_df

In [5]:
mura_train = split_img_paths(train_img_paths)
mura_test = split_img_paths(test_img_paths)
mura_train.head()

Unnamed: 0,image_path,patient_id,study,region,target
0,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00001,1,SHOULDER,abnormal
1,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00001,1,SHOULDER,abnormal
2,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00001,1,SHOULDER,abnormal
3,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00002,1,SHOULDER,abnormal
4,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00002,1,SHOULDER,abnormal


In [6]:
def mura_to14(df):
    new_df = df.copy()
    new_df['target'] = df['target'] + '_' + df['region']
    return new_df

In [7]:
mura_train_14 = mura_to14(mura_train)
mura_test_14 = mura_to14(mura_test)
mura_train_14.head()

Unnamed: 0,image_path,patient_id,study,region,target
0,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00001,1,SHOULDER,abnormal_SHOULDER
1,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00001,1,SHOULDER,abnormal_SHOULDER
2,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00001,1,SHOULDER,abnormal_SHOULDER
3,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00002,1,SHOULDER,abnormal_SHOULDER
4,drive/MyDrive/MURA-v1.1/train/XR_SHOULDER/pati...,patient00002,1,SHOULDER,abnormal_SHOULDER


### Create seperate df for train and test for different regions

In [8]:
wrist_train = mura_train[mura_train['region']=='WRIST']
forearm_train = mura_train[mura_train['region']=='FOREARM']
hand_train = mura_train[mura_train['region']=='HAND']
humerus_train = mura_train[mura_train['region']=='HUMERUS']
elbow_train = mura_train[mura_train['region']=='ELBOW']
shoulder_train = mura_train[mura_train['region']=='SHOULDER']
finger_train = mura_train[mura_train['region']=='FINGER']

In [9]:
wrist_test = mura_train[mura_train['region']=='WRIST']
forearm_test = mura_train[mura_train['region']=='FOREARM']
hand_test = mura_train[mura_train['region']=='HAND']
humerus_test = mura_train[mura_train['region']=='HUMERUS']
elbow_test = mura_train[mura_train['region']=='ELBOW']
shoulder_test = mura_train[mura_train['region']=='SHOULDER']
finger_test = mura_train[mura_train['region']=='FINGER']

## Create custom Datasets and Dataloaders

In [10]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders
data_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((256,256)),
    transforms.ToTensor()
])

In [11]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders
def class_dict(two_targets):
    classes_2 = ['normal', 'abnormal']
    dict_2 = {'normal' : 0, 'abnormal': 1}
    classes_14 = ['abnormal_WRIST', 'normal_WRIST','abnormal_FOREARM', 'normal_FOREARM',
                      'abnormal_HAND', 'normal_HAND', 'abnormal_HUMERUS', 'normal_HUMERUS',
                      'abnormal_ELBOW', 'normal_ELBOW', 'abnormal_SHOULDER', 'normal_SHOULDER',
                      'abnormal_FINGER', 'normal_FINGER']
    dict_14 = {'abnormal_WRIST': 0, 'normal_WRIST': 1,'abnormal_FOREARM' : 2, 'normal_FOREARM' : 3,
                      'abnormal_HAND': 4, 'normal_HAND' : 5, 'abnormal_HUMERUS': 6, 'normal_HUMERUS' : 7,
                      'abnormal_ELBOW': 8, 'normal_ELBOW' : 9, 'abnormal_SHOULDER' : 10, 'normal_SHOULDER': 11,
                      'abnormal_FINGER': 12, 'normal_FINGER' : 13}
    if two_targets == True:
      return classes_2, dict_2
    else:
      return classes_14, dict_14

In [12]:
# https://www.learnpytorch.io/04_pytorch_custom_datasets/#41-turn-loaded-images-into-dataloaders

class MuraDataset(Dataset):
    def __init__(self, mura_df, transform=data_transforms, two_targets=True):
        self.mura = mura_df
        self.transform = transform
        self.classes, self.class_to_idx = class_dict(two_targets=two_targets)

    def __len__(self):
        return len(self.mura)

    def get_image_class(self, index):
        image_path = self.mura.iloc[index]['image_path']
        class_name = self.mura.iloc[index]['target']
        #return read_image(image_path), class_name
        return Image.open(image_path), class_name  

    def __getitem__(self, idx):
        image, class_name = self.get_image_class(idx)
        label = self.class_to_idx[class_name]

        if self.transform:
          return self.transform(image), label
        else:
          return self.transform(image), label


In [14]:
#test on elbow train
elbow_train_dataset = MuraDataset(mura_df = elbow_train)

In [15]:
elbow_test_dataset = MuraDataset(mura_df = elbow_test)

In [16]:
len(elbow_train_dataset)

4931

In [17]:
elbow_train_dataset.classes

['normal', 'abnormal']

In [18]:
elbow_train_dataset.class_to_idx

{'normal': 0, 'abnormal': 1}

In [20]:
elbow_train_dataloader = DataLoader(elbow_train_dataset, batch_size=1, shuffle=True)

In [21]:
elbow_test_dataloader = DataLoader(elbow_test_dataset, batch_size=1, shuffle=True)

In [55]:
i, l = next(iter(elbow_train_dataloader))

In [56]:
i, l

(tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]]]), tensor([1]))

In [57]:
print(i.shape, l.shape)

torch.Size([1, 1, 256, 256]) torch.Size([1])


In [None]:
dict = {}
#for i in range(len(train_dataset)):
    img, label = next(iter(train_dataloader))
    shape = img.shape
    dict[shape] = dict.get(shape, 0) + 1
     


In [160]:
#dict

{torch.Size([1, 3, 512, 406]): 182,
 torch.Size([1, 1, 512, 234]): 1,
 torch.Size([1, 1, 512, 458]): 4,
 torch.Size([1, 1, 512, 512]): 29,
 torch.Size([1, 1, 361, 512]): 1,
 torch.Size([1, 1, 512, 407]): 6,
 torch.Size([1, 1, 399, 512]): 1,
 torch.Size([1, 1, 512, 468]): 4,
 torch.Size([1, 3, 407, 512]): 10,
 torch.Size([1, 1, 471, 512]): 1,
 torch.Size([1, 1, 420, 512]): 1,
 torch.Size([1, 1, 512, 280]): 4,
 torch.Size([1, 1, 512, 406]): 3,
 torch.Size([1, 1, 498, 512]): 1,
 torch.Size([1, 1, 512, 391]): 4,
 torch.Size([1, 1, 512, 443]): 2,
 torch.Size([1, 3, 420, 512]): 11,
 torch.Size([1, 1, 512, 501]): 3,
 torch.Size([1, 1, 512, 273]): 2,
 torch.Size([1, 3, 424, 512]): 13,
 torch.Size([1, 3, 512, 424]): 33,
 torch.Size([1, 1, 512, 341]): 7,
 torch.Size([1, 3, 512, 379]): 23,
 torch.Size([1, 1, 512, 289]): 2,
 torch.Size([1, 1, 512, 281]): 5,
 torch.Size([1, 3, 379, 512]): 10,
 torch.Size([1, 1, 512, 409]): 3,
 torch.Size([1, 1, 473, 512]): 1,
 torch.Size([1, 1, 512, 314]): 2,
 torc

In [26]:
x = Image.open(elbow_train.iloc[120]['image_path']).convert('RGB')

In [27]:
x = data_transforms(x)

In [28]:
x.shape

torch.Size([1, 256, 256])

## Create all Datasets and Dataloaders

# Base Model