<a href="https://colab.research.google.com/github/Vishwagna-Aligety/Breast_Cancer_classification-model/blob/main/Breast_Cancer_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
                  #  Breast Cancer Classification Using CNN (CancerNet)
            #**Objective**: To build a Convolutional Neural Network (CNN) model named *CancerNet* to classify breast cancer histology images (benign or malignant) using the IDC dataset.

In [None]:
# ------------------ STEP 1: Setup Environment ------------------
from google.colab import files
files.upload()  # Upload your kaggle.json API key

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download IDC dataset from Kaggle
!kaggle datasets download -d paultimothymooney/breast-histopathology-images

# Extract dataset
import zipfile
with zipfile.ZipFile("breast-histopathology-images.zip","r") as zip_ref:
    zip_ref.extractall("/content/IDC_dataset")

In [None]:
# ------------------ STEP 2: Organize Dataset ------------------
import os, shutil, random

base_dir = "/content/IDC_dataset"
dataset_root = os.path.join(base_dir, "IDC_regular_ps50_idx5")

train_dir = os.path.join(base_dir, "train")
val_dir   = os.path.join(base_dir, "val")
test_dir  = os.path.join(base_dir, "test")

for d in [train_dir, val_dir, test_dir]:
    os.makedirs(os.path.join(d,"0"), exist_ok=True)
    os.makedirs(os.path.join(d,"1"), exist_ok=True)

# Split dataset manually (70% train, 20% val, 10% test)
all_images = []
for root, dirs, files in os.walk(dataset_root):
    for file in files:
        if file.endswith(".png"):
            all_images.append(os.path.join(root, file))

random.shuffle(all_images)
train_split = int(0.7*len(all_images))
val_split   = int(0.9*len(all_images))

train_files = all_images[:train_split]
val_files   = all_images[train_split:val_split]
test_files  = all_images[val_split:]

def copy_files(file_list, target_dir):
    for f in file_list:
        label = "1" if "class1" in f or "_1" in f else "0"
        shutil.copy(f, os.path.join(target_dir,label,os.path.basename(f)))

copy_files(train_files, train_dir)
copy_files(val_files, val_dir)
copy_files(test_files, test_dir)

In [None]:
# ------------------ STEP 3: Data Generators with Augmentation ------------------
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen_train = ImageDataGenerator(rescale=1./255,
                                   rotation_range=15,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   horizontal_flip=True)
datagen_val   = ImageDataGenerator(rescale=1./255)

img_size = (50,50)
batch = 64

train_gen = datagen_train.flow_from_directory(train_dir, target_size=img_size, batch_size=batch, class_mode="binary")
val_gen   = datagen_val.flow_from_directory(val_dir, target_size=img_size, batch_size=batch, class_mode="binary")
test_gen  = datagen_val.flow_from_directory(test_dir, target_size=img_size, batch_size=batch, class_mode="binary", shuffle=False)

In [None]:
# ------------------ STEP 4: Define CNN (Improved to Reduce Overfitting) ------------------
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

model = Sequential([
    Conv2D(32,(3,3),activation='relu',input_shape=(50,50,3)),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Flatten(),
    Dense(128,activation='relu'),
    Dropout(0.5),
    Dense(1,activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# ------------------ STEP 5: Train Model with Early Stopping ------------------
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(train_gen, epochs=10, validation_data=val_gen, callbacks=[early_stop])

In [None]:
# ------------------ STEP 6: Evaluate Model ------------------
loss, acc = model.evaluate(test_gen)
print(f"Final Test Accuracy: {acc*100:.2f}%")

In [None]:
import matplotlib.pyplot as plt

# Plot Accuracy
plt.figure(figsize=(8,5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training vs Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot Loss
plt.figure(figsize=(8,5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# ------------------ STEP 7: Confusion Matrix ------------------
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

y_pred = (model.predict(test_gen) > 0.5).astype("int32")
cm = confusion_matrix(test_gen.classes, y_pred)
ConfusionMatrixDisplay(cm, display_labels=["Benign","Malignant"]).plot()

In [None]:
# ------------------ STEP 8: Save Model ------------------
model.save("CancerNet_Improved.h5")