Import necessary libraries

In [46]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics

In [47]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Hyperparameters

In [48]:
image_size = 224
rotation = 10
normalization_mean = [0.5, 0.5, 0.5]
normalization_std = [0.5, 0.5, 0.5]

learning_rate = 0.03
batch_size = 32
epochs = 40

Image processing steps

In [49]:
train_transform = transforms.Compose([
    # Resize the images 
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),  # Flip the images horizontally at random
    # Rotate the images
    transforms.RandomRotation(rotation),
    transforms.ToTensor(),  # Convert the images to PyTorch tensors
    # Normalize the pixel values to [-1, 1]
    transforms.Normalize(normalization_mean, normalization_std)
])
test_transform = transforms.Compose([
    # Resize the images to 224x224 pixels
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),  # Convert the images to PyTorch tensors
    # Normalize the pixel values to [-1, 1]
    transforms.Normalize(normalization_mean, normalization_std)
])

In [50]:
# Load the dataset
train_dataset = datasets.ImageFolder('data/train', transform=train_transform)
test_dataset = datasets.ImageFolder('data/test', transform=test_transform)

In [51]:

# Define the CNN model
class CancerCNN(nn.Module):
    def __init__(self):
        super(CancerCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(256 * 28 * 28, 512)
        self.fc2 = nn.Linear(512, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(-1, 256 * 28 * 28)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


Define the loss function and optimizer

In [52]:
model = CancerCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [53]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size, shuffle=False)

Train The model

In [54]:
for epoch in range(epochs):
    train_loss = 0
    train_correct = 0
    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        train_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        train_correct += pred.eq(target.view_as(pred)).sum().item()
        loss.backward()
        optimizer.step()

    train_loss /= len(train_loader.dataset)
    train_acc = 100. * train_correct / len(train_loader.dataset)
    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        train_loss, train_correct, len(train_loader.dataset), train_acc))

    # Test the model
    model.eval()
    test_loss = 0
    test_correct = 0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            test_correct += pred.eq(target.view_as(pred)).sum().item()
            all_preds.extend(pred.cpu().numpy())
            all_targets.extend(target.cpu().numpy())

    test_loss /= len(test_loader.dataset)
    test_acc = 100. * test_correct / len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, test_correct, len(test_loader.dataset), test_acc))

    # Calculate F1 score
    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)
    f1_score = metrics.f1_score(all_targets, all_preds, average='macro')
    print('F1 score: {:.4f}'.format(f1_score))

    print('------------------------------------------------------')


Train set: Average loss: 0.0193, Accuracy: 1693/2637 (64%)
Test set: Average loss: 0.0172, Accuracy: 454/660 (69%)

F1 score: 0.6820
------------------------------------------------------
Train set: Average loss: 0.0160, Accuracy: 1964/2637 (74%)
Test set: Average loss: 0.0175, Accuracy: 468/660 (71%)

F1 score: 0.6932
------------------------------------------------------
Train set: Average loss: 0.0140, Accuracy: 2047/2637 (78%)
Test set: Average loss: 0.0123, Accuracy: 530/660 (80%)

F1 score: 0.8028
------------------------------------------------------
Train set: Average loss: 0.0131, Accuracy: 2103/2637 (80%)
Test set: Average loss: 0.0145, Accuracy: 509/660 (77%)

F1 score: 0.7688
------------------------------------------------------
Train set: Average loss: 0.0124, Accuracy: 2140/2637 (81%)
Test set: Average loss: 0.0120, Accuracy: 526/660 (80%)

F1 score: 0.7968
------------------------------------------------------
Train set: Average loss: 0.0125, Accuracy: 2141/2637 (81%)
T

### Save the model

In [55]:
torch.save(model.state_dict(), 'cancer_cnn.pth')