In [None]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.losses import BinaryFocalCrossentropy
from sklearn.metrics import confusion_matrix, roc_curve, auc, classification_report, RocCurveDisplay

In [None]:
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
base_dir = "/kaggle/input/brain-tumor-detection"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
test_dir = os.path.join(base_dir, "test")

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    horizontal_flip=True,
    brightness_range=[0.7,1.3],
    zoom_range=0.15,
    shear_range=0.08,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224,224),
    batch_size=16,
    class_mode='binary',
    shuffle=True,
    seed=SEED
    
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(224,224),
    batch_size=16,
    class_mode='binary',
    shuffle=False,
    seed=SEED
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(224,224),
    batch_size=16,
    class_mode='binary',
    shuffle=False,
    seed=SEED
)

In [None]:
def build_model(pretrained=True, input_shape=(224,224,3), dropout=0.5):
    base = MobileNetV2(weights='imagenet' if pretrained else None, include_top=False, input_shape=input_shape)
    x = base.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(dropout)(x)
    out = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base.input, outputs=out)
    return model, base

model, base_model = build_model(pretrained=True)

In [None]:
early_stop_1 = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint_1 = ModelCheckpoint("mobilenet_initial.h5", monitor='val_loss', save_best_only=True)
reduce_lr_1  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[early_stop_1, checkpoint_1, reduce_lr_1]
)

In [None]:
from sklearn.utils import class_weight

train_labels = train_generator.classes
classes = np.unique(train_labels)
cw = class_weight.compute_class_weight('balanced', classes=classes, y=train_labels)
class_weights = dict(enumerate(cw))
print("Class weights:", class_weights)

In [None]:
base_model.trainable = True
for layer in base_model.layers[:-50]:
    layer.trainable = False

loss_fn = BinaryFocalCrossentropy(gamma=2.0)  # gamma ile oynayabilirsin (2.0 iyi başlangıç)
model.compile(optimizer=Adam(1e-5), loss=loss_fn, metrics=['accuracy'])

early_stop_2 = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)
checkpoint_2 = ModelCheckpoint("mobilenet_finetuned.h5", monitor='val_loss', save_best_only=True)
reduce_lr_2  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=25,
    class_weight=class_weights,
    callbacks=[early_stop_2, checkpoint_2, reduce_lr_2]
)

In [None]:
plt.figure(figsize=(14,6))


plt.subplot(1,2,1)
plt.plot(history.history.get('accuracy', []), label='Train Acc (Initial)', marker='o')
plt.plot(history.history.get('val_accuracy', []), label='Val Acc (Initial)', marker='o')
plt.plot(history_finetune.history.get('accuracy', []), label='Train Acc (Fine-tune)', marker='x')
plt.plot(history_finetune.history.get('val_accuracy', []), label='Val Acc (Fine-tune)', marker='x')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(history.history.get('loss', []), label='Train Loss (Initial)', marker='o')
plt.plot(history.history.get('val_loss', []), label='Val Loss (Initial)', marker='o')
plt.plot(history_finetune.history.get('loss', []), label='Train Loss (Fine-tune)', marker='x')
plt.plot(history_finetune.history.get('val_loss', []), label='Val Loss (Fine-tune)', marker='x')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
y_true = test_generator.classes
y_prob = model.predict(test_generator, verbose=0).ravel()

fpr, tpr, thresholds = roc_curve(y_true, y_prob)
roc_auc = auc(fpr, tpr)
print("AUC:", roc_auc)

plt.figure(figsize=(6,6))
RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc).plot()
plt.title("ROC Curve")
plt.show()

target_sens = 0.95
idx = np.where(tpr >= target_sens)[0]
if len(idx) > 0:
    thresh_for_target = thresholds[idx[0]]
    print(f"Threshold to reach sensitivity >= {target_sens}: {thresh_for_target:.4f}")
else:
    J = tpr - fpr
    best_idx = np.argmax(J)
    thresh_for_target = thresholds[best_idx]
    print(f"Could not reach sensitivity {target_sens}. Using Youden threshold: {thresh_for_target:.4f}")

best_thresh = float(thresh_for_target)
print("Selected threshold:", best_thresh)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

# Default threshold 0.5
y_pred_05 = (y_prob > 0.5).astype(int)
print("=== Results at threshold 0.5 ===")
print(classification_report(y_true, y_pred_05, target_names=['Normal','Tumor']))
cm05 = confusion_matrix(y_true, y_pred_05)
disp05 = ConfusionMatrixDisplay(cm05, display_labels=['Normal','Tumor'])
disp05.plot(cmap='Blues')
plt.title("Confusion Matrix (threshold=0.5)")
plt.show()

# Selected threshold (best_thresh)
y_pred_best = (y_prob > best_thresh).astype(int)
print(f"=== Results at selected threshold {best_thresh:.3f} ===")
print(classification_report(y_true, y_pred_best, target_names=['Normal','Tumor']))
cmbest = confusion_matrix(y_true, y_pred_best)
dispbest = ConfusionMatrixDisplay(cmbest, display_labels=['Normal','Tumor'])
dispbest.plot(cmap='Blues')
plt.title(f"Confusion Matrix (threshold={best_thresh:.3f})")
plt.show()

In [None]:
import shutil
os.makedirs("misclassified", exist_ok=True)
os.makedirs("misclassified_default_thresh", exist_ok=True)
os.makedirs("misclassified_selected_thresh", exist_ok=True)

# test_generator.filepaths mevcut ise, yoksa build et
filepaths = getattr(test_generator, 'filepaths', None)
if filepaths is None:
    # fallback: construct paths (Kaggle flow_from_directory usually provides filepaths)
    raise RuntimeError("test_generator.filepaths not found. Ensure test_generator was created with flow_from_directory.")

# Default threshold misclassified
for i, fp in enumerate(filepaths):
    if y_pred_05[i] != y_true[i]:
        dst = os.path.join("misclassified_default_thresh", f"{i}_{os.path.basename(fp)}")
        shutil.copy(fp, dst)

# Selected threshold misclassified
for i, fp in enumerate(filepaths):
    if y_pred_best[i] != y_true[i]:
        dst = os.path.join("misclassified_selected_thresh", f"{i}_{os.path.basename(fp)}")
        shutil.copy(fp, dst)

In [None]:
# Threshold 0.5 için
print("\n=== THRESHOLD 0.5 METRICS ===")
print(f"Accuracy:  {accuracy_score(y_true, y_pred_05):.4f}")
print(f"Precision: {precision_score(y_true, y_pred_05):.4f}")
print(f"Recall:    {recall_score(y_true, y_pred_05):.4f}")
print(f"Loss:      {history_finetune.history['val_loss'][-1]:.4f}")

# Selected threshold için
print(f"\n=== THRESHOLD {best_thresh:.3f} METRICS ===")
print(f"Accuracy:  {accuracy_score(y_true, y_pred_best):.4f}")
print(f"Precision: {precision_score(y_true, y_pred_best):.4f}")
print(f"Recall:    {recall_score(y_true, y_pred_best):.4f}")
print(f"AUC:       {roc_auc:.4f}")

In [None]:
def tta_predict_single(model, img_path, tta_steps=6, target_size=(224,224)):
    img = load_img(img_path, target_size=target_size)
    arr = img_to_array(img)
    preds = []
    for _ in range(tta_steps):
        aug = train_datagen.random_transform(arr.copy())
        aug = train_datagen.standardize(aug)
        preds.append(model.predict(np.expand_dims(aug, axis=0))[0,0])
    return np.mean(preds)


use_tta = False 
if use_tta:
    tta_preds = []
    for p in filepaths:
        tta_preds.append(tta_predict_single(model, p, tta_steps=8))
    y_prob_tta = np.array(tta_preds)