# Histopathologic Cancer Detection using EfficientNetB0

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


## Configuration

In [None]:
IMG_SIZE = 96
BATCH_SIZE = 32
DATA_DIR = '/kaggle/input/histopathologic-cancer-detection'


## Load and Prepare Data

In [None]:
df = pd.read_csv(os.path.join(DATA_DIR, 'train_labels.csv'))
df['label'] = df['label'].astype(str)
df['path'] = df['id'].apply(lambda x: os.path.join(DATA_DIR, 'train', f'{x}.tif'))

df = df.sample(8000, random_state=42)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)


## Data Augmentation and Generators

In [None]:
train_gen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=15,
    zoom_range=0.2
)
val_gen = ImageDataGenerator(rescale=1./255)

train_generator = train_gen.flow_from_dataframe(
    train_df, x_col='path', y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE), class_mode='binary',
    batch_size=BATCH_SIZE, shuffle=True
)
val_generator = val_gen.flow_from_dataframe(
    val_df, x_col='path', y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE), class_mode='binary',
    batch_size=BATCH_SIZE, shuffle=False
)


## Build the Model

In [None]:
base = EfficientNetB0(weights=None, include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = GlobalAveragePooling2D()(base.output)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base.input, outputs=output)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


## Train the Model

In [None]:
callbacks = [
    EarlyStopping(patience=3, restore_best_weights=True),
    ReduceLROnPlateau(patience=2, factor=0.5)
]

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=callbacks
)


## Evaluate Model

In [None]:
val_generator.reset()
y_true = val_generator.classes
y_pred = model.predict(val_generator).ravel()
y_pred_labels = (y_pred > 0.5).astype(int)

print(classification_report(y_true, y_pred_labels))
print("ROC AUC Score:", roc_auc_score(y_true, y_pred))


## Predict on Full Test Set and Create Submission

In [None]:
test_paths = sorted(glob(os.path.join(DATA_DIR, 'test', '*.tif')))
test_ids = [os.path.basename(p).replace('.tif', '') for p in test_paths]

test_df = pd.DataFrame({'id': test_ids, 'path': test_paths})

test_gen = ImageDataGenerator(rescale=1./255).flow_from_dataframe(
    test_df, x_col='path', y_col=None, class_mode=None,
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE, shuffle=False
)

preds = model.predict(test_gen, verbose=1).ravel()
submission = pd.DataFrame({'id': test_ids, 'label': preds})
submission.to_csv("submission.csv", index=False)
submission.head()
