In [80]:
import numpy as np 
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support

import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor

from load_data import load_train_data, load_test_data

In [81]:
train_images, train_labels = load_train_data()
test_images, test_labels = load_test_data()

Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>
Size of Training Dataset:  11177
<ParallelMapDataset element_spec={'image': TensorSpec(shape=(), dtype=tf.string, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None), 'label_normal': TensorSpec(shape=(), dtype=tf.int64, name=None)}>


In [82]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.squeeze(test_images, axis=-1)
test_labels = (test_labels>0).astype(int)

print(train_images.shape)
print(train_labels.shape)

print(test_images.shape)
print(test_labels.shape)

(55885, 299, 299)
(55885,)
(15364, 299, 299)
(15364,)


In [83]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class NumpyImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform or ToTensor()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image, label = self.images[idx], self.labels[idx]
        image = self.transform(image)
        return image, label

train_loader=DataLoader(NumpyImageDataset(train_images, train_labels), 
    batch_size=32, shuffle=True)
    
test_loader=DataLoader(NumpyImageDataset(test_images, test_labels),
    batch_size=32, shuffle=False)

In [100]:
# Define MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(299*299, 500)
        self.fc2 = nn.Linear(500, 100)
        self.fc3 = nn.Linear(100, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

# Define training function
def train(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    y_true = []
    y_scores = []
    y_pred = []

    for i, (inputs, labels) in enumerate(dataloader):
        optimizer.zero_grad()
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        preds = (outputs > 0.5).int()
        running_corrects += torch.sum(preds == labels.data)
        
        y_true += labels.data.cpu().tolist()
        y_scores += outputs.squeeze().data.cpu().tolist()
        y_pred += preds.cpu().tolist()

        if i+1 % 100 == 0:
            auc_roc = roc_auc_score(y_true, y_pred)
            precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
            print(f"Loss: {running_loss/((i+1)*inputs.size(0)):.4f} Acc: {running_corrects.float()/((i+1)*inputs.size(0)):.4f} AUC-ROC: {auc_roc:.4f} Precision: {precision:.4f} Recall: {recall:.4f} F1-score: {f1_score:.4f}")

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = running_corrects.float() / len(dataloader.dataset)
    epoch_auc_roc = roc_auc_score(y_true, y_pred)
    epoch_precision, epoch_recall, epoch_f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    print('Train Loss: {:.4f} Acc: {:.4f} AUC-ROC: {:.4f} Precision: {:.4f} Recall: {:.4f} F1-score: {:.4f}'.format(epoch_loss, epoch_acc, epoch_auc_roc, epoch_precision, epoch_recall, epoch_f1_score))
    return epoch_loss, epoch_acc, epoch_auc_roc, epoch_precision, epoch_recall, epoch_f1_score

# Define evaluation function
def evaluate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    y_scores = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels.float())
            running_loss += loss.item() * inputs.size(0)
            preds = (outputs > 0.5).int()
            y_scores.append(preds)
    
    return y_scores


In [85]:
model = MLP().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(10):
    train(model, train_loader, optimizer, criterion)


Loss: 0.7317 Acc: 5.0000 AUC-ROC: 0.5000 Precision: 0.1562 Recall: 1.0000 F1-score: 0.2703
Loss: 0.4771 Acc: 26.9682 AUC-ROC: 0.5409 Precision: 0.3579 Recall: 0.1111 F1-score: 0.1696
Loss: 0.4278 Acc: 26.8944 AUC-ROC: 0.5635 Precision: 0.4784 Recall: 0.1517 F1-score: 0.2304
Loss: 0.4050 Acc: 26.9208 AUC-ROC: 0.5700 Precision: 0.5228 Recall: 0.1621 F1-score: 0.2475
Train Loss: 0.3996 Acc: 26.8863 AUC-ROC: 0.5740 Precision: 0.5417 Recall: 0.1694 F1-score: 0.2581
Loss: 0.3597 Acc: 28.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))


Loss: 0.3347 Acc: 27.0186 AUC-ROC: 0.5964 Precision: 0.6493 Recall: 0.2092 F1-score: 0.3164
Loss: 0.3331 Acc: 26.8362 AUC-ROC: 0.6012 Precision: 0.6495 Recall: 0.2203 F1-score: 0.3290
Loss: 0.3283 Acc: 26.7853 AUC-ROC: 0.6055 Precision: 0.6544 Recall: 0.2292 F1-score: 0.3395
Train Loss: 0.3227 Acc: 26.7551 AUC-ROC: 0.6104 Precision: 0.6596 Recall: 0.2393 F1-score: 0.3512
Loss: 0.2647 Acc: 29.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))


Loss: 0.2908 Acc: 26.6286 AUC-ROC: 0.6313 Precision: 0.6759 Recall: 0.2828 F1-score: 0.3988
Loss: 0.2926 Acc: 26.5321 AUC-ROC: 0.6296 Precision: 0.6482 Recall: 0.2823 F1-score: 0.3933
Loss: 0.2874 Acc: 26.4981 AUC-ROC: 0.6358 Precision: 0.6552 Recall: 0.2949 F1-score: 0.4067
Train Loss: 0.2850 Acc: 26.4885 AUC-ROC: 0.6382 Precision: 0.6626 Recall: 0.2994 F1-score: 0.4124
Loss: 0.3317 Acc: 24.1250 AUC-ROC: 0.7500 Precision: 1.0000 Recall: 0.5000 F1-score: 0.6667
Loss: 0.2733 Acc: 26.4178 AUC-ROC: 0.6529 Precision: 0.6823 Recall: 0.3287 F1-score: 0.4437
Loss: 0.2726 Acc: 26.3384 AUC-ROC: 0.6553 Precision: 0.6834 Recall: 0.3340 F1-score: 0.4487
Loss: 0.2736 Acc: 26.3582 AUC-ROC: 0.6512 Precision: 0.6735 Recall: 0.3263 F1-score: 0.4396
Train Loss: 0.2725 Acc: 26.3934 AUC-ROC: 0.6508 Precision: 0.6737 Recall: 0.3251 F1-score: 0.4386
Loss: 0.2705 Acc: 25.7500 AUC-ROC: 0.6149 Precision: 0.2500 Recall: 0.3333 F1-score: 0.2857
Loss: 0.2677 Acc: 26.4824 AUC-ROC: 0.6451 Precision: 0.6496 Recall: 

Training:   0%|          | 0/1747 [1:49:49<?, ?it/s]


KeyboardInterrupt: 

In [86]:
y_hat = evaluate(model, test_loader, criterion)

In [87]:
y_pred = []
for tens in y_hat: 
    y_pred += tens.numpy().flatten().tolist()

roc_auc_score(test_labels, y_pred)
precision_recall_fscore_support(test_labels, y_pred, average='binary')

(0.6971935007385525, 0.47105788423153694, 0.5622394282310901, None)

In [101]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(64 * 37 * 37, 256)
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(256, 1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)
        return x


In [102]:
model = SimpleCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(10):
    train(model, train_loader, optimizer, criterion)

Loss: 0.7050 Acc: 3.0000 AUC-ROC: 0.5000 Precision: 0.0938 Recall: 1.0000 F1-score: 0.1714


KeyboardInterrupt: 

In [None]:
y_pred = []
for tens in y_hat: 
    y_pred += tens.numpy().flatten().tolist()

roc_auc_score(test_labels, y_pred)
precision_recall_fscore_support(test_labels, y_pred, average='binary')

In [105]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(299*299, 1)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.linear(x)
        out = torch.sigmoid(out)
        return out

In [106]:
model = LogisticRegression().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

for epoch in range(10):
    train(model, train_loader, optimizer, criterion)

Loss: 0.6624 Acc: 22.8750 AUC-ROC: 0.4038 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Train Loss: 12.9906 Acc: 27.8211 AUC-ROC: 0.4999 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 6.2500 Acc: 30.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 13.0229 Acc: 27.8229 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 21.8751 Acc: 25.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 13.0343 Acc: 27.8225 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 15.6250 Acc: 27.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 13.0400 Acc: 27.8219 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 9.3750 Acc: 29.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 13.0421 Acc: 27.8222 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 6.2500 Acc: 30.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 13.0424 Acc: 27.8222 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000
Loss: 6.2500 Acc: 30.0000 AUC-ROC: 0.5000 Precision: 0.0000 Recall: 0.0000 F1-score: 0.0000


KeyboardInterrupt: 