In [26]:
import numpy as np 
import tensorflow as tf 
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
from torchvision.models import vision_transformer
from torchvision.transforms import transforms

from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, accuracy_score

import os 
from PIL import Image

In [2]:
def load_train_data(): 
    training_data = ["../data/archive/training10_0/training10_0.tfrecords", 
        "../data/archive/training10_1/training10_1.tfrecords",
        "../data/archive/training10_2/training10_2.tfrecords",
        "../data/archive/training10_3/training10_3.tfrecords",
        "../data/archive/training10_4/training10_4.tfrecords"]

    images=[]
    labels=[]
    feature_dictionary = {
        'label': tf.io.FixedLenFeature([], tf.int64),
        'label_normal': tf.io.FixedLenFeature([], tf.int64),
        'image': tf.io.FixedLenFeature([], tf.string)
        }

    def _parse_function(example, feature_dictionary=feature_dictionary):
        parsed_example = tf.io.parse_example(example, feature_dictionary)
        return parsed_example

    def read_data(filename):
        full_dataset = tf.data.TFRecordDataset(filename,num_parallel_reads=tf.data.experimental.AUTOTUNE)
        full_dataset = full_dataset.shuffle(buffer_size=31000)
        full_dataset = full_dataset.cache()
        print("Size of Training Dataset: ", len(list(full_dataset)))
        
        feature_dictionary = {
        'label': tf.io.FixedLenFeature([], tf.int64),
        'label_normal': tf.io.FixedLenFeature([], tf.int64),
        'image': tf.io.FixedLenFeature([], tf.string)
        }   

        full_dataset = full_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        print(full_dataset)
        for image_features in full_dataset:
            image = image_features['image'].numpy()
            image = tf.io.decode_raw(image_features['image'], tf.uint8)
            image = tf.reshape(image, [299, 299])        
            image=image.numpy()
            #plt.imshow(image)
            images.append(image)
            labels.append(image_features['label_normal'].numpy())

    for file in training_data:
        read_data(file)

    return images, labels

def load_test_data():
    # Load .npy file
    test_data = np.load('../data/archive/test10_data/test10_data.npy')
    test_labels = np.load('../data/archive/test10_labels.npy')

    cv_data = np.load('../data/archive/cv10_data/cv10_data.npy')
    cv_labels = np.load('../data/archive/cv10_labels.npy')

    # combine test and cv into single test set
    test_data = np.concatenate((test_data, cv_data), axis=0)
    test_labels = np.concatenate((test_labels, cv_labels), axis=0)

    return test_data, test_labels

In [3]:
train_images, train_labels = load_train_data()
test_images, test_labels = load_test_data()

2023-04-25 15:02:22.309834: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>


In [8]:
# train_images = np.array(train_images)
# train_labels = np.array(train_labels)
# test_images = np.squeeze(test_images, axis=-1)
# test_labels = (test_labels>0).astype(int)

print(train_images.shape)
print(train_labels.shape)

print(test_images.shape)
print(test_labels.shape)

(55885, 299, 299)
(55885,)
(15364, 299, 299)
(15364,)


In [6]:
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())

True
True


In [7]:
import gc
gc.collect()

758

In [21]:
device = torch.device("mps")

class NumpyImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform or ToTensor()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image, label = self.images[idx], self.labels[idx]
        image = np.stack((image,)*3, axis=-1)
        image = Image.fromarray(image)
        image = self.transform(image)
        return image, label

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225])
])

train_loader=DataLoader(
    NumpyImageDataset(train_images, train_labels, transform = transform), 
    batch_size=32, shuffle=False, pin_memory=True)
    
test_loader=DataLoader(
    NumpyImageDataset(test_images, test_labels, transform = transform),
    batch_size=32, shuffle=False, pin_memory=True)

In [10]:
# Define training function
def train(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    y_true = []
    y_scores = []
    y_pred = []

    for i, (inputs, labels) in enumerate(dataloader):
        for param in model.parameters():
            param.grad = None
            
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        preds = (outputs > 0.5).int()
        running_corrects += torch.sum(preds == labels.data)
        
        y_true += labels.data.cpu().tolist()
        y_scores += outputs.squeeze().data.cpu().tolist()
        y_pred += preds.cpu().tolist()

        if (i % 100 == 0):
            auc_roc = roc_auc_score(y_true, y_pred)
            precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary', zero_division=0)
            print(f"Loss: {running_loss/((i+1)*inputs.size(0)):.4f} AUC-ROC: {auc_roc:.4f} Precision: {precision:.4f} Recall: {recall:.4f} F1-score: {f1_score:.4f}")

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = running_corrects.float() / len(dataloader.dataset)
    epoch_auc_roc = roc_auc_score(y_true, y_pred)
    epoch_precision, epoch_recall, epoch_f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    print('Train Loss: {:.4f} Acc: {:.4f} AUC-ROC: {:.4f} Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f}'.format(epoch_loss, epoch_acc, epoch_auc_roc, epoch_precision, epoch_recall, epoch_f1_score))
    return epoch_loss, epoch_acc, epoch_auc_roc, epoch_precision, epoch_recall, epoch_f1_score

# Define evaluation function
def evaluate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    y_scores = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            outputs = outputs.reshape(-1)
            loss = criterion(outputs.squeeze(), labels.float())
            running_loss += loss.item() * inputs.size(0)
            preds = (outputs > 0.5).int()
            y_scores.append(preds)
    
    return y_scores

In [22]:
model = vision_transformer.vit_b_16(
    weights=vision_transformer.ViT_B_16_Weights.IMAGENET1K_V1)

for param in model.parameters(): 
    param.requires_grad = False

model.to(device)

## get and store embeddings
model.eval()
train_embeddings = []
for images, _ in train_loader:
    images = images.to(device)
    train_embeddings += model(images)


In [23]:
del train_loader
gc.collect()

6016

In [32]:
#train_embeddings = torch.stack(train_embeddings)
train_dataset = data.TensorDataset(train_embeddings, torch.tensor(train_labels))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(1000, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        return x

shallow_model = MLP().to(device)

optimizer = torch.optim.Adam(shallow_model.parameters(), lr=0.0001)
criterion = nn.BCEWithLogitsLoss()

In [30]:
train_embeddings.shape

torch.Size([55885, 1000])

In [33]:
for i in range(10): 
    train(shallow_model, train_loader, optimizer, criterion)

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [11]:
for i in range(5):
    train(model, train_loader, optimizer, criterion)

Loss: 0.6822 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 0.4525 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 0.4134 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 0.3934 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 0.3747 AUC-ROC: 0.5003 Precision: 1.0000 Recall: 0.0006 F1-score: 0.0012
Loss: 0.3622 AUC-ROC: 0.5007 Precision: 1.0000 Recall: 0.0015 F1-score: 0.0029
Loss: 0.3503 AUC-ROC: 0.5022 Precision: 0.9167 Recall: 0.0045 F1-score: 0.0089
Loss: 0.3412 AUC-ROC: 0.5053 Precision: 0.9394 Recall: 0.0107 F1-score: 0.0211
Loss: 0.3349 AUC-ROC: 0.5132 Precision: 0.9375 Recall: 0.0266 F1-score: 0.0517
Loss: 0.3273 AUC-ROC: 0.5184 Precision: 0.9161 Recall: 0.0373 F1-score: 0.0717
Loss: 0.3191 AUC-ROC: 0.5236 Precision: 0.9220 Recall: 0.0479 F1-score: 0.0910
Loss: 0.3134 AUC-ROC: 0.5277 Precision: 0.9024 Recall: 0.0563 F1-score: 0.1060
Loss: 0.3094 AUC-ROC: 0.5320 Precision: 0.8889 Recal

In [12]:
y_hat = evaluate(model, test_loader, criterion)
y_pred = []

for tens in y_hat: 
    tens = tens.to('cpu')
    y_pred += tens.numpy().flatten().tolist()

print(roc_auc_score(test_labels, y_pred))
print(precision_recall_fscore_support(test_labels, y_pred, average='binary'))
print(accuracy_score(test_labels, y_pred))

0.6858283433133733
(0.8208955223880597, 0.3842315369261477, 0.5234534330387492, None)
0.9087477219474095


In [74]:
print(model)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_a