<a href="https://colab.research.google.com/github/Zahra1725-dot/ML-Projects/blob/main/Finetune_Breast_cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!git config --global user.email "zahrakarami778@gmail.com"
!git config --global user.name "Zahra1725-dot"

In [4]:
!git clone https://github.com/Zahra1725-dot/Deep-_learning_Finetune.projects.git

Cloning into 'Deep-_learning_Finetune.projects'...


In [1]:
!mv "Finetune_Breast_cancer.ipynb" "https://github.com/Zahra1725-dot/Deep-_learning_Finetune.projects.git"

mv: cannot stat 'Finetune_Breast_cancer.ipynb': No such file or directory


In [2]:
!mv "Finetune_Breast_cancer.ipynb" "Deep-_learning_Finetune.projects/"

mv: cannot stat 'Finetune_Breast_cancer.ipynb': No such file or directory


In [4]:
!unzip breast-histopathology-images.zip -d /content/breast_cancer_dataset

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1551_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1601_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1651_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1701_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1751_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1801_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1851_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/0/9346_idx5_x2351_y1901_class0.png  
  inflating: /content/breast_cancer_dataset/IDC_regular_ps50_idx5/9346/

In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models, optimizers
import matplotlib.pyplot as plt

In [6]:
data_dir = '/content/breast_cancer_dataset'

In [7]:
# Define image dimensions and batch size
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

In [8]:
# Use ImageDataGenerator to load and preprocess the data
# We'll rescale images and split them into training and validation sets
datagen = ImageDataGenerator(
    rescale=1./255,                  # Normalize pixel values to [0, 1]
    validation_split=0.2               # 20% of data for validation
)

In [9]:
# Load the training dataset
train_ds = datagen.flow_from_directory(
    data_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    seed=123
)
# Load the validation dataset
val_ds = datagen.flow_from_directory(
    data_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    seed=123
)

Found 444147 images belonging to 280 classes.
Found 110901 images belonging to 280 classes.


In [None]:
print("Class names:", train_ds.class_indices)

In [11]:
# Load the pre-trained VGG16 model
base_model = VGG16(
    weights='imagenet', # Load weights trained on ImageNet
    include_top=False,  # Exclude the classifier layers at the top
    input_shape=(224, 224, 3) # Define the input shape
)

# Freeze the layers of the base model
# This is crucial for feature extraction
for layer in base_model.layers:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
# Build the new model on top of the base model
x = base_model.output
x = layers.Flatten()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation='sigmoid')(x) # Single neuron for binary classification

In [13]:
# Create the final model
model = models.Model(inputs=base_model.input, outputs=output)

In [None]:
# Print a summary of the model architecture
model.summary()

In [15]:
# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Train the model
# We'll use a relatively small number of epochs for a quick demo
EPOCHS = 10
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds
)

In [None]:
# Plotting the training and validation accuracy and loss
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend(loc='upper right')

plt.show()

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

# Get all the true labels and predicted labels from the validation dataset
val_labels = []
val_predictions = []

# Assuming val_ds is a tf.data.Dataset or a generator
# Get the total number of batches in the validation set
num_batches = len(val_ds)

# Iterate through the validation dataset and get predictions
for i in range(num_batches):
    images, labels = next(val_ds) # Get a batch of images and labels
    predictions = model.predict(images) # Make predictions for the batch

    val_labels.extend(labels) # Store the true labels
    val_predictions.extend(predictions) # Store the predicted probabilities

# Convert predictions from probabilities to binary classes (0 or 1)
# We use a threshold of 0.5
val_predictions = np.array(val_predictions)
val_predictions_binary = (val_predictions > 0.5).astype(int)

# Assuming val_labels are one-hot encoded (e.g., [1, 0] for benign, [0, 1] for malignant)
# We need to convert them to single integers (0 or 1)
# You should get the class indices from your data generator
# For example: {'benign': 0, 'malignant': 1}
val_labels_binary = np.argmax(val_labels, axis=1) # If labels are one-hot

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import itertools

# Get all the true labels from the validation dataset
# ImageDataGenerator's labels are already in the correct order
val_labels = val_ds.classes

# Reset the validation generator to ensure it starts from the beginning
val_ds.reset()

# Get all predictions from the model on the validation dataset
val_predictions_proba = model.predict(val_ds)

# Convert predicted probabilities to binary classes (0 or 1)
# We use a threshold of 0.5
val_predictions_binary = (val_predictions_proba > 0.5).astype(int)

# In ImageDataGenerator, the class indices are assigned automatically
# The `val_ds.class_indices` will show the mapping, e.g., {'benign': 0, 'malignant': 1}
# Our `val_labels` and `val_predictions_binary` are now ready for the confusion matrix.

In [None]:
# Create the Confusion Matrix
conf_matrix = confusion_matrix(val_labels, val_predictions_binary)

# Get the class names from the generator for better readability in the report
class_names = list(val_ds.class_indices.keys())

# Print the Confusion Matrix
print("Confusion Matrix:")
print(conf_matrix)

# Plot the Confusion Matrix for better visualization
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

plt.figure()
plot_confusion_matrix(conf_matrix, classes=class_names,
                      title='Confusion Matrix')
plt.show()

# Print a detailed classification report
print("\nClassification Report:")
print(classification_report(val_labels, val_predictions_binary, target_names=class_names))