<a href="https://colab.research.google.com/github/asheta66/CNN/blob/main/Breast_Cancer_AlexNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
import os
from collections import defaultdict

# 1. Connect to Google Drive
drive.mount('/content/drive')
data_dir = '/content/drive/My Drive/Data2Original'

# 2. Check the number of folders and define the number of classes
folders = [f for f in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, f))]
num_classes = len(folders)
print(f"Number of classes: {num_classes}")

# 3. Check and display the image resolutions
image_resolutions = set()
for folder in folders:
    folder_path = os.path.join(data_dir, folder)
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        img = tf.keras.preprocessing.image.load_img(img_path)
        img_size = img.size
        image_resolutions.add(img_size)

print(f"Image resolutions: {image_resolutions}")


# Function to display class names and the number of images in each class
def display_class_distribution(data_dir):
    class_counts = defaultdict(int)

    for folder in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, folder)
        if os.path.isdir(folder_path):
            num_images = len([img for img in os.listdir(folder_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))])
            class_counts[folder] = num_images

    return class_counts

# Display the class distribution
class_distribution = display_class_distribution(data_dir)
for class_name, count in class_distribution.items():
    print(f"Class: {class_name}, Number of images: {count}")


# Resize images to fit AlexNet input
target_size = (227, 227)  # AlexNet input size

# 4. Develop a transfer learning model using AlexNet
def build_alexnet_model(num_classes):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(96, (11, 11), strides=(4, 4), activation='relu', input_shape=(227, 227, 3)),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2)),
        tf.keras.layers.Conv2D(256, (5, 5), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2)),
        tf.keras.layers.Conv2D(384, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.Conv2D(384, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4096, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(4096, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

model = build_alexnet_model(num_classes)
model.summary()

# 5. Data Preprocessing
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=32,
    class_mode='categorical',
    subset='training'
)
validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=target_size,
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# 6. Compile and train the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator
)

# Save the model structure
model.save('/content/drive/My Drive/alexnet_model.h5')

# 7. Display and save accuracy, precision, recall, and F1 scores for training and testing cases

# Calculate metrics for the validation set
def calculate_metrics(generator, model):
    y_true = generator.classes
    y_pred = model.predict(generator)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Generate classification report
    report = classification_report(y_true, y_pred_classes, target_names=generator.class_indices.keys())
    return report

# Calculate metrics for the validation set
validation_report = calculate_metrics(validation_generator, model)
print("Validation Set Metrics:\n", validation_report)

# Save metrics to a file
with open('/content/drive/My Drive/validation_metrics.txt', 'w') as f:
    f.write("Validation Set Metrics:\n")
    f.write(validation_report)

# For training metrics, use a similar approach
train_labels = np.concatenate([y for x, y in train_generator], axis=0)
train_predictions = model.predict(train_generator)
train_predictions_classes = np.argmax(train_predictions, axis=1)

# Generate classification report for training set
train_report = classification_report(train_labels, train_predictions_classes, target_names=train_generator.class_indices.keys())
print("Training Set Metrics:\n", train_report)

# Save training metrics to a file
with open('/content/drive/My Drive/training_metrics.txt', 'w') as f:
    f.write("Training Set Metrics:\n")
    f.write(train_report)

# Plot and save confusion matrix and convergence curves
y_true = validation_generator.classes
y_pred = model.predict(validation_generator)
y_pred_classes = np.argmax(y_pred, axis=1)

conf_matrix = confusion_matrix(y_true, y_pred_classes)
ConfusionMatrixDisplay(conf_matrix, display_labels=folders).plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.savefig('/content/drive/My Drive/confusion_matrix.png')
plt.show()

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Convergence Curve - Accuracy')
plt.savefig('/content/drive/My Drive/convergence_curve_accuracy.png')
plt.show()

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Convergence Curve - Loss')
plt.savefig('/content/drive/My Drive/convergence_curve_loss.png')
plt.show()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of classes: 3
Image resolutions: {(224, 224)}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Found 1263 images belonging to 3 classes.
Found 315 images belonging to 3 classes.
Epoch 1/10


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m286s[0m 7s/step - accuracy: 0.4873 - loss: 1.1841 - val_accuracy: 0.6476 - val_loss: 0.7725
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 7s/step - accuracy: 0.6622 - loss: 0.7360 - val_accuracy: 0.6794 - val_loss: 0.6800
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 7s/step - accuracy: 0.6930 - loss: 0.6852 - val_accuracy: 0.6857 - val_loss: 0.6767
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 7s/step - accuracy: 0.7084 - loss: 0.6584 - val_accuracy: 0.6984 - val_loss: 0.6860
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 7s/step - accuracy: 0.6920 - loss: 0.6343 - val_accuracy: 0.6571 - val_loss: 1.0044
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 7s/step - accuracy: 0.6701 - loss: 0.6977 - val_accuracy: 0.6508 - val_loss: 0.7359
Epoch 7/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━



[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step
Validation Set Metrics:
               precision    recall  f1-score   support

      benign       0.56      0.62      0.59       178
   malignant       0.24      0.24      0.24        84
      normal       0.12      0.08      0.09        53

    accuracy                           0.43       315
   macro avg       0.31      0.31      0.31       315
weighted avg       0.40      0.43      0.41       315

