<a href="https://colab.research.google.com/github/Yashmaini30/Breast-Cancer-Detection/blob/main/ResNet50_with_RF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

from __future__ import absolute_import, print_function, division, unicode_literals
import os
import glob
import shutil
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dropout, Dense, Activation, GlobalAveragePooling2D
from keras import regularizers
import numpy as np
import matplotlib.pyplot as plt
import pathlib
from sklearn.metrics import precision_score, recall_score, accuracy_score, classification_report, confusion_matrix

!pip install imagehash
import imagehash

Mounted at /content/drive
Collecting imagehash
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting PyWavelets (from imagehash)
  Downloading pywavelets-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Downloading ImageHash-4.3.1-py2.py3-none-any.whl (296 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.5/296.5 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pywavelets-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyWavelets, imagehash
Successfully installed PyWavelets-1.6.0 imagehash-4.3.1


In [2]:
data_dir = "/content/drive/MyDrive/archive"
data_dir = pathlib.Path(data_dir)
train_path = data_dir / 'train'
test_path = data_dir / 'test'

In [3]:
BATCH_SIZE = 8
IMG_HEIGHT = 224
IMG_WIDTH = 224

In [4]:
image_train_gen = ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.70,
    rotation_range=60,
    horizontal_flip=True,
    vertical_flip=True,
    width_shift_range=0.20,
    height_shift_range=0.20
)
train_data_gen = image_train_gen.flow_from_directory(
    train_path,
    shuffle=True,
    batch_size=BATCH_SIZE,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='categorical'
)

Found 1075 images belonging to 3 classes.


In [5]:
img_val_gen = ImageDataGenerator(rescale=1./255)
val_data_gen = img_val_gen.flow_from_directory(
    test_path,
    batch_size=BATCH_SIZE,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    class_mode='categorical'
)

Found 281 images belonging to 3 classes.


In [6]:
from tensorflow.keras.applications import ResNet50

base_model = ResNet50(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                      include_top=False,
                      weights='imagenet')

# Set layers to non-trainable
base_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
feature_extractor = Model(inputs=base_model.input, outputs=GlobalAveragePooling2D()(base_model.output))

In [8]:
def extract_features_and_save(data_gen, output_file):
    features = []
    labels = []

    for inputs_batch, labels_batch in data_gen:
        features_batch = feature_extractor.predict(inputs_batch)
        features.append(features_batch)
        labels.append(labels_batch)
        if len(features) * BATCH_SIZE >= data_gen.samples:
            break

    features = np.vstack(features)
    labels = np.vstack(labels)
    labels = np.argmax(labels, axis=1)  # Convert one-hot encoding to class indices

    feature_df = pd.DataFrame(features)
    feature_df['label'] = labels
    feature_df.to_csv(output_file, index=False)

extract_features_and_save(train_data_gen, 'train_features.csv')
extract_features_and_save(val_data_gen, 'val_features.csv')

# Load the features from CSV files
train_features = pd.read_csv('train_features.csv')
val_features = pd.read_csv('val_features.csv')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━

In [9]:
X_train = train_features.drop('label', axis=1)
y_train = train_features['label']
X_val = val_features.drop('label', axis=1)
y_val = val_features['label']

In [10]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate the classifier
y_pred = rf.predict(X_val)

In [12]:
class_names = ['normal', 'malignant', 'benign']

accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred, average='weighted')
recall = recall_score(y_val, y_pred, average='weighted')

print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)

# Classification Report
print(classification_report(y_val, y_pred, target_names=class_names))

# Confusion Matrix
conf_matrix = confusion_matrix(y_val, y_pred)
print("Confusion Matrix: \n", conf_matrix)

Accuracy:  0.7722419928825622
Precision:  0.7338561032826388
Recall:  0.7722419928825622
              precision    recall  f1-score   support

      normal       0.75      0.96      0.84       175
   malignant       0.88      0.58      0.70        85
      benign       0.00      0.00      0.00        21

    accuracy                           0.77       281
   macro avg       0.54      0.51      0.51       281
weighted avg       0.73      0.77      0.74       281

Confusion Matrix: 
 [[168   5   2]
 [ 36  49   0]
 [ 19   2   0]]
