## Importing all the necessary Libraries

In [None]:
import cv2
import matplotlib.pyplot as plt
import os
import pandas as pd
import shutil
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l2
from sklearn.metrics import precision_score
from sklearn.metrics import precision_recall_curve

In [None]:
labels_df = pd.read_csv('C:/Users/swapn/OneDrive/Desktop/7th Sem/Aira_project/histopathologic-cancer-detection/train_labels.csv')

In [None]:
image_dir = 'C:/Users/swapn/OneDrive/Desktop/7th Sem/Aira_project/histopathologic-cancer-detection/train/'
images = []
labels = []

In [None]:
for index, row in labels_df.iterrows():

    image_id = row['id']  
    img_path = os.path.join(image_dir, image_id + '.tif')  
    image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)  
     
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    images.append(image)
    labels.append(row['label'])  

### Displaing some images from the Dataset

In [None]:
for i in range(5):  
    plt.imshow(images[i])
    plt.title(f'Label: {labels_df["label"].iloc[i]}')
    plt.axis('off')
    plt.show()

In [None]:
print(f'Image shape: {images[0].shape}')

In [None]:
label_counts = labels_df['label'].value_counts()
print(label_counts)

In [None]:
label_counts.plot(kind='bar')
plt.title('Label Distribution')
plt.xlabel('Classes')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
print(f'Total images: {len(images)}')
print(f'Total classes: {labels_df["label"].nunique()}')

In [None]:
labels_df.head()

In [None]:
labels_df['image_path'] = labels_df['id'].apply(lambda x: os.path.join(image_dir, f"{x}.tif"))

In [None]:
labels_df.head()

### Train-Validation Split

In [None]:
train_df, val_df = train_test_split(labels_df, test_size=0.2, random_state=42, stratify=labels_df['label'])

In [None]:
print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")

In [None]:
val_images_dir = 'C:/Users/swapn/OneDrive/Desktop/7th Sem/Aira_project/val_images'
os.makedirs(val_images_dir, exist_ok=True)

In [None]:
def move_images(df, target_dir):
    for _, row in df.iterrows():
        class_dir = os.path.join(target_dir, f'class_{row["label"]}')
        os.makedirs(class_dir, exist_ok=True)
        source_path = row['image_path']
        destination_path = os.path.join(class_dir, f"{row['id']}.tif")
        shutil.copy(source_path, destination_path)


In [None]:
move_images(val_df, val_images_dir)

In [None]:
train_images_dir = 'C:/Users/swapn/OneDrive/Desktop/7th Sem/Aira_project/train_images'
os.makedirs(train_images_dir, exist_ok=True)

In [None]:
move_images(train_df, train_images_dir)
print("Images moved successfully.")

In [None]:
image_files = os.listdir(image_dir)

In [None]:
labels = [0 if 'class_0' in file else 1 for file in image_files] 
data = pd.DataFrame({'filename': image_files, 'label': labels})

### Data Agmentation

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,  # Increase rotation
    width_shift_range=0.3,  # Increase shift
    height_shift_range=0.3,  # Increase shift
    shear_range=0.3,  # Increase shear
    zoom_range=0.3,  # Increase zoom
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],  # Change brightness
    fill_mode='nearest'
)


In [None]:
val_datagen = ImageDataGenerator(
    rescale=1./255  # Normalize pixel values to [0, 1]
)

In [None]:
train_generator = train_datagen.flow_from_directory(
    'C:/Users/swapn/OneDrive/Desktop/7th Sem/Aira_project/train_images',  # Path to training data directory
    target_size=(96, 96),  # Resize images to 96x96
    batch_size=32,
    class_mode='binary'  # Use 'categorical' for multi-class
)

In [None]:
val_generator = val_datagen.flow_from_directory(
    'C:/Users/swapn/OneDrive/Desktop/7th Sem/Aira_project/val_images',  # Path to validation data directory
    target_size=(96, 96),  # Resize images to 96x96
    batch_size=32,
    class_mode='binary'  # Use 'categorical' for multi-class
)

### Defining the Base Model

In [None]:
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(96, 96, 3))

In [None]:
base_model.trainable = False

In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global Average Pooling
x = Dense(512, activation='relu')(x)  # Fully connected layer
x = Dropout(0.5)(x)  # Dropout for regularization
predictions = Dense(1, activation='sigmoid')(x)

In [None]:
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
num_epochs = 20

### Training the model

In [None]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=num_epochs,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator),
    verbose=1  # You can set this to 2 for less verbose output
)

#### Loss plot

In [None]:
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')

#### Accuracy plot

In [None]:
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='lower right')

plt.tight_layout()
plt.show()

### FINE-TUNING

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-6)

In [None]:
x = model.layers[-3].output  # Access the layer just before Dropout
x = Dropout(0.4)(x)
x = BatchNormalization()(x)  # Add Batch Normalization
new_predictions = Dense(1, activation='sigmoid')(x)

In [None]:
for layer in base_model.layers[:150]:  
    layer.trainable = False

In [None]:
new_model = Model(inputs=model.input, outputs=new_predictions)

In [None]:
new_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

### Training on Fine-Tuned Model

In [None]:
history = new_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator),
    callbacks=[early_stopping,reduce_lr],
    verbose=1
)

### Checking for Losses, Accuracy, Precision and Recall

In [None]:
validation_loss, validation_accuracy = model.evaluate(val_generator, verbose=0)

In [None]:
print(f'Validation Accuracy: {validation_accuracy:.4f}')

In [None]:
validation_predictions = model.predict(val_generator)

In [None]:
threshold = 0.4
predicted_classes = (validation_predictions > threshold).astype(int)

In [None]:
true_classes = validation_generator.classes

In [None]:
precision = precision_score(true_classes, predicted_classes)
recall = recall_score(true_classes, predicted_classes)

In [None]:
print(f'Validation Precision: {precision:.4f}')
print(f'Validation Recall: {recall:.4f}')

### Precision, Recall Plot

In [None]:
precisions, recalls, thresholds = precision_recall_curve(true_classes, validation_predictions)

In [None]:
plt.plot(recalls, precisions, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.show()