In [1]:
import numpy as np 
import tensorflow as tf 
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor

from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, accuracy_score

import os 
from PIL import Image

In [3]:
def load_train_data(): 
    training_data = ["../data/archive/training10_0/training10_0.tfrecords", 
        "../data/archive/training10_1/training10_1.tfrecords",
        "../data/archive/training10_2/training10_2.tfrecords",
        "../data/archive/training10_3/training10_3.tfrecords",
        "../data/archive/training10_4/training10_4.tfrecords"]

    images=[]
    labels=[]
    feature_dictionary = {
        'label': tf.io.FixedLenFeature([], tf.int64),
        'label_normal': tf.io.FixedLenFeature([], tf.int64),
        'image': tf.io.FixedLenFeature([], tf.string)
        }

    def _parse_function(example, feature_dictionary=feature_dictionary):
        parsed_example = tf.io.parse_example(example, feature_dictionary)
        return parsed_example

    def read_data(filename):
        full_dataset = tf.data.TFRecordDataset(filename,num_parallel_reads=tf.data.experimental.AUTOTUNE)
        full_dataset = full_dataset.shuffle(buffer_size=31000)
        full_dataset = full_dataset.cache()
        print("Size of Training Dataset: ", len(list(full_dataset)))
        
        feature_dictionary = {
        'label': tf.io.FixedLenFeature([], tf.int64),
        'label_normal': tf.io.FixedLenFeature([], tf.int64),
        'image': tf.io.FixedLenFeature([], tf.string)
        }   

        full_dataset = full_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        print(full_dataset)
        for image_features in full_dataset:
            image = image_features['image'].numpy()
            image = tf.io.decode_raw(image_features['image'], tf.uint8)
            image = tf.reshape(image, [299, 299])        
            image=image.numpy()
            #plt.imshow(image)
            images.append(image)
            labels.append(image_features['label_normal'].numpy())

    for file in training_data:
        read_data(file)

    return images, labels

def load_test_data():
    # Load .npy file
    test_data = np.load('../data/archive/test10_data/test10_data.npy')
    test_labels = np.load('../data/archive/test10_labels.npy')

    cv_data = np.load('../data/archive/cv10_data/cv10_data.npy')
    cv_labels = np.load('../data/archive/cv10_labels.npy')

    # combine test and cv into single test set
    test_data = np.concatenate((test_data, cv_data), axis=0)
    test_labels = np.concatenate((test_labels, cv_labels), axis=0)

    return test_data, test_labels

In [4]:
train_images, train_labels = load_train_data()
test_images, test_labels = load_test_data()

2023-04-24 14:03:54.617402: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>


In [5]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.squeeze(test_images, axis=-1)
test_labels = (test_labels>0).astype(int)

print(train_images.shape)
print(train_labels.shape)

print(test_images.shape)
print(test_labels.shape)

(55885, 299, 299)
(55885,)
(15364, 299, 299)
(15364,)


In [2]:
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())


True
True


In [11]:
device = torch.device("mps")

class NumpyImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform or ToTensor()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image, label = self.images[idx], self.labels[idx]
        image = self.transform(image)
        return image, label

train_loader=DataLoader(NumpyImageDataset(train_images, train_labels), 
    batch_size=32, shuffle=True)
    
test_loader=DataLoader(NumpyImageDataset(test_images, test_labels),
    batch_size=32, shuffle=False)

In [12]:
# Define MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(299*299, 500)
        self.fc2 = nn.Linear(500, 100)
        self.fc3 = nn.Linear(100, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(64 * 37 * 37, 256)
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(256, 1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)
        return x

class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(299*299, 1)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.linear(x)
        out = torch.sigmoid(out)
        return out

In [13]:
# Define training function
def train(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    y_true = []
    y_scores = []
    y_pred = []

    for i, (inputs, labels) in enumerate(dataloader):
        for param in model.parameters():
            param.grad = None
            
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        preds = (outputs > 0.5).int()
        running_corrects += torch.sum(preds == labels.data)
        
        y_true += labels.data.cpu().tolist()
        y_scores += outputs.squeeze().data.cpu().tolist()
        y_pred += preds.cpu().tolist()

        if (i % 100 == 0):
            auc_roc = roc_auc_score(y_true, y_pred)
            precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary', zero_division=0)
            print(f"Loss: {running_loss/((i+1)*inputs.size(0)):.4f} Acc: {running_corrects.float()/((i+1)*inputs.size(0)):.4f} AUC-ROC: {auc_roc:.4f} Precision: {precision:.4f} Recall: {recall:.4f} F1-score: {f1_score:.4f}")

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = running_corrects.float() / len(dataloader.dataset)
    epoch_auc_roc = roc_auc_score(y_true, y_pred)
    epoch_precision, epoch_recall, epoch_f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    print('Train Loss: {:.4f} Acc: {:.4f} AUC-ROC: {:.4f} Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f}'.format(epoch_loss, epoch_acc, epoch_auc_roc, epoch_precision, epoch_recall, epoch_f1_score))
    return epoch_loss, epoch_acc, epoch_auc_roc, epoch_precision, epoch_recall, epoch_f1_score

# Define evaluation function
def evaluate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    y_scores = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels.float())
            running_loss += loss.item() * inputs.size(0)
            preds = (outputs > 0.5).int()
            y_scores.append(preds)
    
    return y_scores


In [14]:
model = MLP().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(10):
    train(model, train_loader, optimizer, criterion)


Loss: 0.6846 Acc: 27.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 0.5690 Acc: 26.5507 AUC-ROC: 0.5209 Precision: 0.2360 Recall: 0.0860 F1-score: 0.1260
Loss: 0.4986 Acc: 26.8137 AUC-ROC: 0.5295 Precision: 0.3015 Recall: 0.0919 F1-score: 0.1408
Loss: 0.4663 Acc: 26.8721 AUC-ROC: 0.5437 Precision: 0.3844 Recall: 0.1157 F1-score: 0.1779


KeyboardInterrupt: 

In [33]:
y_hat = evaluate(model, test_loader, criterion)
y_pred = []
for tens in y_hat: 
    y_pred += tens.numpy().flatten().tolist()

print(roc_auc_score(test_labels, y_pred))
print(precision_recall_fscore_support(test_labels, y_pred, average='binary'))
print(accuracy_score(test_labels, y_pred))

0.6758857285429141
(0.7650103519668737, 0.3687624750499002, 0.49764309764309766, None)
0.9028898724290549


In [15]:
model = SimpleCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(5):
    train(model, train_loader, optimizer, criterion)

Loss: 0.7137 Acc: 4.0000 AUC-ROC: 0.5000 Precision: 0.1250 Recall: 1.0000 F1-score: 0.2222
Loss: 0.3544 Acc: 27.8144 AUC-ROC: 0.4999 Precision: 0.1212 Recall: 0.0101 F1-score: 0.0186
Loss: 0.3263 Acc: 27.7718 AUC-ROC: 0.5113 Precision: 0.3692 Recall: 0.0299 F1-score: 0.0554
Loss: 0.3212 Acc: 27.3698 AUC-ROC: 0.5445 Precision: 0.5319 Recall: 0.1020 F1-score: 0.1712
Loss: 0.3196 Acc: 27.0567 AUC-ROC: 0.5655 Precision: 0.5781 Recall: 0.1473 F1-score: 0.2347
Loss: 0.3170 Acc: 26.9508 AUC-ROC: 0.5761 Precision: 0.5812 Recall: 0.1706 F1-score: 0.2638
Loss: 0.3142 Acc: 26.8443 AUC-ROC: 0.5868 Precision: 0.6032 Recall: 0.1929 F1-score: 0.2923
Loss: 0.3108 Acc: 26.7824 AUC-ROC: 0.5937 Precision: 0.6036 Recall: 0.2081 F1-score: 0.3095
Loss: 0.3061 Acc: 26.7888 AUC-ROC: 0.5986 Precision: 0.6121 Recall: 0.2178 F1-score: 0.3213
Loss: 0.3039 Acc: 26.7807 AUC-ROC: 0.6003 Precision: 0.6187 Recall: 0.2209 F1-score: 0.3255
Loss: 0.3016 Acc: 26.7432 AUC-ROC: 0.6061 Precision: 0.6266 Recall: 0.2329 F1-sco

In [21]:
for epoch in range(5):
    train(model, train_loader, optimizer, criterion)

Loss: 0.1608 Acc: 23.5625 AUC-ROC: 0.8815 Precision: 0.8000 Recall: 0.8000 F1-score: 0.8000
Loss: 0.1781 Acc: 25.6572 AUC-ROC: 0.7691 Precision: 0.7888 Recall: 0.5610 F1-score: 0.6557
Loss: 0.1797 Acc: 25.7484 AUC-ROC: 0.7670 Precision: 0.8003 Recall: 0.5550 F1-score: 0.6555
Loss: 0.1822 Acc: 25.6773 AUC-ROC: 0.7664 Precision: 0.7875 Recall: 0.5557 F1-score: 0.6516
Loss: 0.1840 Acc: 25.5525 AUC-ROC: 0.7712 Precision: 0.7817 Recall: 0.5669 F1-score: 0.6572
Loss: 0.1821 Acc: 25.5947 AUC-ROC: 0.7720 Precision: 0.7823 Recall: 0.5683 F1-score: 0.6583
Loss: 0.1811 Acc: 25.6585 AUC-ROC: 0.7721 Precision: 0.7889 Recall: 0.5672 F1-score: 0.6599
Loss: 0.1811 Acc: 25.6864 AUC-ROC: 0.7731 Precision: 0.7939 Recall: 0.5685 F1-score: 0.6626
Loss: 0.1820 Acc: 25.6616 AUC-ROC: 0.7736 Precision: 0.7945 Recall: 0.5697 F1-score: 0.6636
Loss: 0.1803 Acc: 25.6887 AUC-ROC: 0.7746 Precision: 0.7932 Recall: 0.5718 F1-score: 0.6645
Loss: 0.1796 Acc: 25.7241 AUC-ROC: 0.7740 Precision: 0.7937 Recall: 0.5703 F1-sc

In [23]:
y_hat = evaluate(model, test_loader, criterion)
y_pred = []
for tens in y_hat: 
    tens = tens.to('cpu')
    y_pred += tens.numpy().flatten().tolist()

print(roc_auc_score(test_labels, y_pred))
print(precision_recall_fscore_support(test_labels, y_pred, average='binary'))
print(accuracy_score(test_labels, y_pred))

0.7493138722554891
(0.90625, 0.5064870259481038, 0.6498079385403329, None)
0.9287945847435564


In [36]:
model = LogisticRegression().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(1):
    train(model, train_loader, optimizer, criterion)

Loss: 0.7355 Acc: 7.2500 AUC-ROC: 0.6000 Precision: 0.0769 Recall: 1.0000 F1-score: 0.1429
Loss: 13.1086 Acc: 27.5173 AUC-ROC: 0.4980 Precision: 0.0769 Recall: 0.0047 F1-score: 0.0088
Loss: 12.7673 Acc: 27.7674 AUC-ROC: 0.4991 Precision: 0.0769 Recall: 0.0024 F1-score: 0.0047
Loss: 12.7527 Acc: 27.8181 AUC-ROC: 0.4994 Precision: 0.0769 Recall: 0.0016 F1-score: 0.0032
Loss: 12.7119 Acc: 27.8560 AUC-ROC: 0.4995 Precision: 0.0769 Recall: 0.0012 F1-score: 0.0024
Loss: 12.8468 Acc: 27.8268 AUC-ROC: 0.4996 Precision: 0.0769 Recall: 0.0010 F1-score: 0.0019
Loss: 12.9041 Acc: 27.8157 AUC-ROC: 0.4997 Precision: 0.0769 Recall: 0.0008 F1-score: 0.0016
Loss: 12.8597 Acc: 27.8363 AUC-ROC: 0.4997 Precision: 0.0769 Recall: 0.0007 F1-score: 0.0014
Loss: 12.8963 Acc: 27.8293 AUC-ROC: 0.4998 Precision: 0.0769 Recall: 0.0006 F1-score: 0.0012
Loss: 12.9882 Acc: 27.8038 AUC-ROC: 0.4998 Precision: 0.0769 Recall: 0.0005 F1-score: 0.0011
Loss: 13.0089 Acc: 27.7994 AUC-ROC: 0.4998 Precision: 0.0769 Recall: 0.0

In [38]:
y_hat = evaluate(model, test_loader, criterion)
y_pred = []
for tens in y_hat: 
    y_pred += tens.numpy().flatten().tolist()

print(roc_auc_score(test_labels, y_pred))
print(precision_recall_fscore_support(test_labels, y_pred, average='binary'))
print(accuracy_score(test_labels, y_pred))

0.5
(0.0, 0.0, 0.0, None)
0.8695652173913043


  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
import torch
import torch.nn as nn
%pip install einops
from einops.layers.torch import Rearrange

class VisionTransformer(nn.Module):
    def __init__(self, image_size=299, patch_size=16, num_classes=2, dim=768, depth=12, heads=12, mlp_dim=3072):
        super().__init__()
        assert image_size % patch_size == 0, "image size must be divisible by patch size"
        num_patches = (image_size // patch_size) ** 2
        patch_dim = 3 * patch_size ** 2  # assuming 3-channel RGB images
        
        self.patch_size = patch_size
        self.num_patches = num_patches
        
        # input projection
        self.projection = nn.Sequential(
            nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size),
            Rearrange('b c h w -> b (h w) c')
        )
        
        # transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=depth)
        
        # output projection
        self.fc = nn.Linear(dim, num_classes)

    def forward(self, x):
        # pad the input image to make it evenly divisible by patch_size
        _, _, h, w = x.size()
        ph = (self.patch_size - h % self.patch_size) % self.patch_size
        pw = (self.patch_size - w % self.patch_size) % self.patch_size
        padding = nn.ZeroPad2d((0, pw, 0, ph))
        x = padding(x)
        
        x = self.projection(x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        x = self.fc(x)
        return x

Collecting einops
  Using cached einops-0.6.1-py3-none-any.whl (42 kB)
Installing collected packages: einops
Successfully installed einops-0.6.1


In [28]:
model = VisionTransformer(image_size=320, patch_size=16).to(device)

# Step 3: Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Step 4: Write a training loop
num_epochs = 10
total_steps = 0
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        total_steps += 1

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backward pass and update the parameters
        loss.backward()
        optimizer.step()
        
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                    .format(epoch+1, 10, i+1, total_steps, loss.item()))

Epoch [1/10], Step [1/1], Loss: 0.6198


RuntimeError: MPS backend out of memory (MPS allocated: 15.21 GB, other allocations: 2.92 GB, max allowed: 18.13 GB). Tried to allocate 33.84 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).