<a href="https://colab.research.google.com/github/Yashmaini30/Breast-Cancer-Detection/blob/main/ResNet50_with_ML_classifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install lazypredict
from google.colab import drive
drive.mount('/content/drive')

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import torchvision
from torchvision import models, transforms
from PIL import Image, ImageChops
from torch.utils.data import DataLoader, Dataset
import os
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl.metadata (12 kB)
Collecting nvidia-nccl-cu12 (from xgboost->lazypredict)
  Downloading nvidia_nccl_cu12-2.22.3-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)
Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Downloading nvidia_nccl_cu12-2.22.3-py3-none-manylinux2014_x86_64.whl (190.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.9/190.9 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nvidia-nccl-cu12, lazypredict
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.3.1+cu121 requires nvidia-cublas-cu12==12.1.3.1; platform_system == "Linux" and platform_machine == "x86_64", which is not installed.
torch 2.3.1+cu121 requires nvidia-cuda-cupti-cu12==12.1.105; platform_system == "Linux" and platform_machine =

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [3]:
# Define the labels and corresponding directories
labels = ['benign', 'malignant', 'normal']
data_dir = '/content/drive/MyDrive/Dataset_BUSI_with_GT'


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [5]:
class BUSIDataset(Dataset):
    def __init__(self, data_dir, labels, transform=None):
        self.data_dir = data_dir
        self.labels = labels
        self.transform = transform
        self.image_paths = []
        self.mask_paths = []
        self.image_labels = []

        for label in labels:
            label_dir = os.path.join(data_dir, label)
            image_files = [file for file in os.listdir(label_dir) if file.endswith('.png') and 'mask' not in file]
            self.image_paths.extend([os.path.join(label_dir, img) for img in image_files])
            self.mask_paths.extend([os.path.join(label_dir, img.replace('.png', '_mask.png')) for img in image_files])
            self.image_labels.extend([label] * len(image_files))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]

        image = Image.open(image_path).convert("RGB")
        mask = Image.open(mask_path).convert("RGB")

        # Overlay image and mask
        overlay = ImageChops.add(image, mask, scale=2.0, offset=0)

        label = self.image_labels[idx]
        if self.transform:
            overlay = self.transform(overlay)
        return overlay, label

In [6]:
dataset = BUSIDataset(data_dir, labels, transform=transform)

In [7]:
# Initialize a pre-trained ResNet50 model for feature extraction
model = models.resnet50(pretrained=True)
model = nn.Sequential(*list(model.children())[:-1])
model.eval()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 97.7MB/s]


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [8]:
# Extract features and labels
features = []
image_labels = []

with torch.no_grad():
    for img, label in DataLoader(dataset, batch_size=32):
        img = img
        output = model(img)
        output = output.view(output.size(0), -1)  # Flatten the output
        features.extend(output.numpy())
        image_labels.extend(label)

In [9]:
# Convert to DataFrame for LazyPredict
features_df = pd.DataFrame(features)
image_labels_df = pd.Series(image_labels)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(features_df, image_labels_df, test_size=0.2, random_state=42)

In [14]:
# Apply LazyClassifier
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

 97%|█████████▋| 28/29 [00:28<00:00,  4.22it/s]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018386 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 426744
[LightGBM] [Info] Number of data points in the train set: 624, number of used features: 2048
[LightGBM] [Info] Start training from score -0.569682
[LightGBM] [Info] Start training from score -1.318157
[LightGBM] [Info] Start training from score -1.791759


100%|██████████| 29/29 [01:16<00:00,  2.65s/it]


In [17]:

print(models.columns)

# Access the columns using the correct names
print(models[['Accuracy', 'Time Taken']])

Index(['Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score', 'Time Taken'], dtype='object')
                               Accuracy  Time Taken
Model                                              
RidgeClassifierCV                  0.99        0.30
SVC                                0.99        0.35
RidgeClassifier                    0.99        0.18
LinearDiscriminantAnalysis         0.98        0.48
Perceptron                         0.98        0.15
ExtraTreesClassifier               0.98        0.36
RandomForestClassifier             0.98        1.88
LogisticRegression                 0.98        0.50
CalibratedClassifierCV             0.98        3.56
SGDClassifier                      0.97        0.17
PassiveAggressiveClassifier        0.97        0.25
LinearSVC                          0.97        0.78
KNeighborsClassifier               0.97        0.09
BernoulliNB                        0.96        0.18
NearestCentroid                    0.96        0.11
GaussianNB            