In [None]:
pip install torch torchvision

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
import torch.optim as optim
import numpy as np
import copy
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from PIL import Image

In [2]:
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
if torch.cuda.is_available():
    print("CUDA Device Name:", torch.cuda.get_device_name(0))
    print("Current CUDA Device ID:", torch.cuda.current_device())


PyTorch Version: 2.2.0+cpu
CUDA Available: False
CUDA Version: None


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from zipfile import ZipFile

# Specify the zip file path
zip_ref = ZipFile("/content/drive/MyDrive/CHM/breast.zip", 'r')

# Extract all files to the current directory (you can specify a different path)
zip_ref.extractall()

zip_ref.close()

In [5]:
import os
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, SubsetRandomSampler
import numpy as np
import torch

# Set the directory for the data
data_dir = '/content/breast/classes/'

# Define transformations
transformations = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Initialize the dataset using ImageFolder
dataset = datasets.ImageFolder(root=data_dir, transform=transformations)

# Print out class names to verify
print("Classes:", dataset.classes)

# Splitting data indices for training, validation, and testing
np.random.seed(42)
indices = np.random.permutation(len(dataset))
split_train = int(0.7 * len(indices))
split_val = int(0.85 * len(indices))
train_indices, val_indices, test_indices = indices[:split_train], indices[split_train:split_val], indices[split_val:]

# Creating samplers for each set
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

# Creating DataLoader for each set
train_loader = DataLoader(dataset, batch_size=16, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=16, sampler=val_sampler)
test_loader = DataLoader(dataset, batch_size=16, sampler=test_sampler)

# Print out the number of samples in each set
print("Number of training samples:", len(train_indices))
print("Number of validation samples:", len(val_indices))
print("Number of test samples:", len(test_indices))

Classes: ['Adenosis', 'Ductal_Carcinoma', 'Fibroadenoma', 'Lobular_Carcinoma', 'Mucinous_Carcinoma', 'Papillary_Carcinoma', 'Phyllodes_Tumor', 'Tubular_Adenoma']
Number of training samples: 1456
Number of validation samples: 312
Number of test samples: 313


In [6]:
import torch
from torchvision import models

# Load a pretrained VGG-16 model
model = models.vgg16(pretrained=True)

# Modify the classifier layer to match the number of classes in your dataset
num_features = model.classifier[6].in_features  # Access the in_features of the last layer in the classifier
num_classes = len(dataset.classes)
model.classifier[6] = torch.nn.Linear(num_features, num_classes)  # Replace the last layer

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Confirm model is on GPU
print("Model on CUDA:", next(model.parameters()).is_cuda)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 193MB/s]


Model on CUDA: False


In [7]:
# Loss function
criterion = torch.nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [8]:
import time

def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=25, device='cuda'):
    model.to(device)
    best_acc = 0.0
    for epoch in range(num_epochs):
        start_time = time.time()  # Start time of the epoch
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                loader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                loader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward and optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(loader.sampler)
            epoch_acc = running_corrects.double() / len(loader.sampler)

            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        end_time = time.time()  # End time of the epoch
        print(f'Epoch duration: {end_time - start_time:.2f} seconds')
        print()

    print('Best val Acc: {:4f}'.format(best_acc))

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
trained_model = train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=100, device=device)

Epoch 1/100
----------
Train Loss: 2.2121 Acc: 0.3990
Val Loss: 1.7273 Acc: 0.4519
Epoch duration: 173.54 seconds

Epoch 2/100
----------
Train Loss: 1.7615 Acc: 0.4279
Val Loss: 1.6376 Acc: 0.5128
Epoch duration: 169.59 seconds

Epoch 3/100
----------
Train Loss: 1.6308 Acc: 0.4766
Val Loss: 1.6182 Acc: 0.4712
Epoch duration: 174.69 seconds

Epoch 4/100
----------
Train Loss: 1.5796 Acc: 0.4924
Val Loss: 1.4762 Acc: 0.5353
Epoch duration: 171.03 seconds

Epoch 5/100
----------
Train Loss: 1.5313 Acc: 0.4883
Val Loss: 1.3970 Acc: 0.5417
Epoch duration: 163.73 seconds

Epoch 6/100
----------
Train Loss: 1.5329 Acc: 0.4863
Val Loss: 1.6362 Acc: 0.5192
Epoch duration: 165.35 seconds

Epoch 7/100
----------
Train Loss: 1.7865 Acc: 0.4684
Val Loss: 1.7740 Acc: 0.4519
Epoch duration: 161.99 seconds

Epoch 8/100
----------
Train Loss: 2.8418 Acc: 0.4087
Val Loss: 1.8584 Acc: 0.4519
Epoch duration: 171.85 seconds

Epoch 9/100
----------
Train Loss: 9.0354 Acc: 0.4341
Val Loss: 1.7660 Acc: 0.45

In [None]:
# Save the entire model
torch.save(model, 'O:/Sem3/CHM/Breast_Cancer_Classification/breast/breast/resnet50_breakhis_complete_model.pth')


In [None]:
# Load the entire model
complete_model_path = 'O:/Sem3/CHM/Breast_Cancer_Classification/breast/breast/resnet50_breakhis_complete_model.pth'
model = torch.load(complete_model_path)
model.to('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()


### Evaluation:

In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import torch


In [None]:
def evaluate_model(model, test_loader, device):
    model.eval()  # Set the model to evaluation mode
    true_labels = []
    pred_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(preds.cpu().numpy())

    accuracy = np.sum(np.array(true_labels) == np.array(pred_labels)) / len(true_labels)
    print(f'Accuracy: {accuracy:.4f}')
    print("Classification Report:")
    print(classification_report(true_labels, pred_labels, target_names=test_loader.dataset.classes))
    print("Confusion Matrix:")
    print(confusion_matrix(true_labels, pred_labels))


In [None]:
# Evaluate the trained model
evaluate_model(trained_model, test_loader, device)


Accuracy: 0.9553
Classification Report:
              precision    recall  f1-score   support

      benign       0.96      0.89      0.92       345
   malignant       0.96      0.98      0.97       842

    accuracy                           0.96      1187
   macro avg       0.96      0.94      0.94      1187
weighted avg       0.96      0.96      0.95      1187

Confusion Matrix:
[[306  39]
 [ 14 828]]
