In [1]:
import gc
import torch
import wandb
import torch.nn.functional as F

from torch import nn
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader

%matplotlib inline
%env "WANDB_NOTEBOOK_NAME" "experiments.ipynb"

env: "WANDB_NOTEBOOK_NAME"="experiments.ipynb"


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
IMG_SIZE = 224
BATCH_SIZE = 32
IMG_SHOW_NUM = 6
EPOCHS = 20
LEARNING_RATE = 0.001

In [4]:
wandb.init(
    project="skin-cancer",

    config={
    "learning_rate": LEARNING_RATE,
    "architecture": "CUSTOM CNN",
    "dataset": "SKIN-CANCER",
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE
    }
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfilnow[0m ([33mfilnow42[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
transformer = transforms.Compose([
    transforms.Resize(size = (IMG_SIZE, IMG_SIZE), antialias = True),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
])

testTransformer = transforms.Compose([
    transforms.Resize(size = (IMG_SIZE, IMG_SIZE), antialias = True),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
])

In [6]:
trainData = datasets.ImageFolder(root = "./train", transform = transformer)
testSet = datasets.ImageFolder(root = "./test", transform = testTransformer)

trainSet, valSet = torch.utils.data.random_split(trainData, [int(0.8 * len(trainData)), len(trainData) - int(0.8 * len(trainData))])

In [7]:
trainLoader = DataLoader(trainSet, batch_size = BATCH_SIZE, shuffle=True)
valLoader = DataLoader(valSet, batch_size=BATCH_SIZE, shuffle=True)
testLoader = DataLoader(testSet, batch_size = BATCH_SIZE, shuffle = False)

In [8]:
print('Train: ', len(trainSet))
print('Validation: ', len(valSet))
print('Test: ', len(testSet))

Train:  2109
Validation:  528
Test:  660


In [9]:
def to_device(data, device):
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [10]:
class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        return len(self.dl)

In [11]:
train_loader = DeviceDataLoader(trainLoader, device)
val_loader = DeviceDataLoader(valLoader, device)
test_loader = DeviceDataLoader(testLoader, device)

In [13]:
patience = 5
minDelta = 0.01
currentPatience = 0
bestLoss = float("inf")

In [14]:
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

wandb.watch(model, log_freq=100)

to_device(model, device)

criterion= nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, threshold=0.01, factor=0.1, patience=3, min_lr=1e-5, verbose=True)

for epoch in range(EPOCHS): 
    print()
    print(f'EPOCH {epoch+1}') 
    print()

    model.train(True)

    running_loss, last_loss, avg_loss = 0., 0., 0.
    train_correct, train_total = 0, 0
    
    for i, data in enumerate(train_loader):
        input, label = data
        optimizer.zero_grad()

        outputs = model(input)
        loss = criterion(outputs, label)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        avg_loss += loss.item()

        if i % 10 == 0:
            last_loss = running_loss / 10
            print(f'Batch {i} Loss train: {last_loss:.3f}')
            running_loss = 0.
        
        _, predicted = torch.max(outputs.data, 1)
        train_total += label.size(0)
        train_correct += (predicted == label).sum().item()

        torch.cuda.empty_cache()
        _ = gc.collect()
        

    avg_loss /= int(len(trainSet)/BATCH_SIZE)
    
    running_vloss = 0.
    
    model.eval()

    val_correct, val_total = 0, 0

    with torch.no_grad():
        for i, val_data in enumerate(val_loader):
            val_input, val_label = val_data
            val_output = model(val_input)
            val_loss = criterion(val_output, val_label)
            running_vloss += val_loss.item()

            _, vpredicted = torch.max(val_output.data, 1)
            val_total += val_label.size(0)
            val_correct += (vpredicted == val_label).sum().item()

    avg_vloss = running_vloss / (i + 1)

    train_accuracy = train_correct / train_total
    val_accuracy = val_correct / val_total

    wandb.log({"train/loss": avg_loss, "train/acc": train_accuracy,
               "val/loss": avg_vloss, "val/acc": val_accuracy})

    print(f'Loss train {avg_loss:.3f}, loss valid {avg_vloss:.3f}')
    print(f'Accuracy train {train_accuracy:.2%}, accuracy valid {val_accuracy:.2%}')

    if avg_vloss < bestLoss - minDelta:
        bestLoss = avg_vloss
        currentPatience = 0
    else:
        currentPatience += 1
        if currentPatience >= patience:
            print("EARLY STOPPING")
            break
    
    scheduler.step(avg_vloss)

wandb.finish()


EPOCH 1

Batch 0 Loss train: 0.067
Batch 10 Loss train: 0.454
Batch 20 Loss train: 0.367
Batch 30 Loss train: 0.345
Batch 40 Loss train: 0.257
Batch 50 Loss train: 0.298
Batch 60 Loss train: 0.331
Loss train 0.348, loss valid 0.332
Accuracy train 83.78%, accuracy valid 83.14%

EPOCH 2

Batch 0 Loss train: 0.045
Batch 10 Loss train: 0.208
Batch 20 Loss train: 0.263
Batch 30 Loss train: 0.237
Batch 40 Loss train: 0.201
Batch 50 Loss train: 0.214
Batch 60 Loss train: 0.297
Loss train 0.240, loss valid 0.321
Accuracy train 89.76%, accuracy valid 86.17%

EPOCH 3

Batch 0 Loss train: 0.017
Batch 10 Loss train: 0.137
Batch 20 Loss train: 0.207
Batch 30 Loss train: 0.191
Batch 40 Loss train: 0.289
Batch 50 Loss train: 0.205
Batch 60 Loss train: 0.250
Loss train 0.220, loss valid 0.307
Accuracy train 91.42%, accuracy valid 86.93%

EPOCH 4

Batch 0 Loss train: 0.012
Batch 10 Loss train: 0.151
Batch 20 Loss train: 0.181
Batch 30 Loss train: 0.141
Batch 40 Loss train: 0.160
Batch 50 Loss train: 0

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/acc,▁▄▅▆▆▆▇█
train/loss,█▅▅▄▃▃▂▁
val/acc,▁▅▆▅▆▆▆█
val/loss,▂▁▁▄▂▂█▂

0,1
train/acc,0.9853
train/loss,0.04908
val/acc,0.88068
val/loss,0.36271


In [15]:
model.eval()

test_correct, test_total = 0, 0

with torch.no_grad():
    for i, data in enumerate(test_loader):
        inputs, labels = data
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = test_correct / test_total

print(f'Accuracy test {test_accuracy:.2%}')

Accuracy test 91.67%
