## Hücre 1: Ortam Hazırlığı

In [None]:
import os, random, warnings
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import mixed_precision
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

try:
    policy = mixed_precision.Policy('mixed_bfloat16')
    mixed_precision.set_global_policy(policy)
    print("Policy: mixed_bfloat16 (Ampere/Hopper architecture optimized)")
except:
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_global_policy(policy)

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print(f"TensorFlow: {tf.__version__}")
print(f"GPU Availability: {tf.config.list_physical_devices('GPU')}")

## Hücre 2: Kaggle Dataset İndirme

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded:
    os.makedirs('/root/.kaggle', exist_ok=True)
    os.replace(fn, f'/root/.kaggle/{fn}')
    os.chmod(f'/root/.kaggle/{fn}', 0o600)

os.makedirs('/content/datasets', exist_ok=True)

print("[INFO] Dataset indiriliyor...")
!kaggle datasets download -d andrewmvd/lung-and-colon-cancer-histopathological-images \
    -p /content/datasets --unzip -q

print("[OK] Dataset hazır: /content/datasets/lung_colon_image_set")

## Hücre 3: Veri Hazırlama ve Split

In [None]:
!pip install imagehash

import os
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
import imagehash
from sklearn.model_selection import GroupShuffleSplit
from tqdm.notebook import tqdm

LUNG_ROOT = Path('/content/datasets/lung_colon_image_set/lung_image_sets')
mapping = {'lung_n': 0, 'lung_aca': 1, 'lung_scc': 2}

print("[ANALİZ] Görüntü Hash'leri hesaplanıyor (Flip + Rotation Duyarlı)...")

def calculate_phash(image_path):
    try:
        img = Image.open(image_path)
        hashes = []

        for image_version in [img, img.transpose(Image.FLIP_LEFT_RIGHT)]:
            for angle in [0, 90, 180, 270]:
                rot_img = image_version.rotate(angle, expand=True)
                hashes.append(str(imagehash.phash(rot_img)))

        return min(hashes)
    except Exception as e:
        print(f"Hata: {e}")
        return None

rows = []
for cls_folder, label in mapping.items():
    folder = LUNG_ROOT / cls_folder
    files = list(folder.rglob('*.jpeg')) + list(folder.rglob('*.jpg'))

    print(f"   Klasör taranıyor: {cls_folder} ({len(files)} dosya)")
    for img_path in tqdm(files, desc=f"{cls_folder}", leave=False):
        h = calculate_phash(img_path)
        if h:
            rows.append({'filename': str(img_path), 'label': label, 'img_hash': h})

df = pd.DataFrame(rows)

print("[İŞLEM] Benzer görüntüler gruplanıyor...")
df['group_id'] = df.groupby('img_hash').ngroup()

unique_groups = df['group_id'].nunique()
print(f"\n[SONUÇ] Toplam Görüntü: {len(df)}")
print(f"[SONUÇ] Tespit Edilen Benzersiz Grup Sayısı: {unique_groups}")
print(f"        (Beklenen: ~750 - 2000 arası olmalı)")

gss = GroupShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
train_idx, test_idx = next(gss.split(df, groups=df['group_id']))

train_df = df.iloc[train_idx]
temp_df = df.iloc[test_idx]

gss_val = GroupShuffleSplit(n_splits=1, test_size=0.50, random_state=42)
val_idx, test_final_idx = next(gss_val.split(temp_df, groups=temp_df['group_id']))

val_df = temp_df.iloc[val_idx]
test_df = temp_df.iloc[test_final_idx]

# Kaydetme işlemleri...
os.makedirs('/content/data_csv_leakage_proof', exist_ok=True)
train_df.to_csv('/content/data_csv_leakage_proof/train.csv', index=False)
val_df.to_csv('/content/data_csv_leakage_proof/val.csv', index=False)
test_df.to_csv('/content/data_csv_leakage_proof/test.csv', index=False)

print("\n[BİLGİ] Yeni dağılım:")
print(f"Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)}")
print("[ONAY] Train setindeki hiçbir görüntünün kopyası (döndürülmüş veya ters çevrilmiş hali) Test setinde YOK.")

## Hücre 4: tf.data Pipeline

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

def create_dataset_paper_impl(df, batch_size, is_training=False):
    """
    Tummala et al. (2023) Makale Stratejisi + 56GB RAM Optimizasyonu
    """

    def load_and_crop(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)


        img = tf.image.central_crop(img, central_fraction=0.75)
        img = tf.image.resize(img, IMG_SIZE)

        img = tf.cast(img, tf.float32)
        return img, label

    def augment(img, label):

        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)


        k = tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)
        img = tf.image.rot90(img, k)

        return img, label

    paths = df['filename'].values
    labels = df['label'].astype(float).values

    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    ds = ds.map(load_and_crop, num_parallel_calls=AUTOTUNE)

    ds = ds.cache()

    if is_training:
        ds = ds.shuffle(10000, seed=SEED)
        ds = ds.map(augment, num_parallel_calls=AUTOTUNE)

    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTOTUNE)
    return ds

print("[INFO] Pipeline oluşturuluyor (RAM Cache devrede)...")
train_ds = create_dataset_paper_impl(train_df, BATCH_SIZE, is_training=True)
val_ds = create_dataset_paper_impl(val_df, BATCH_SIZE, is_training=False)
test_ds = create_dataset_paper_impl(test_df, BATCH_SIZE, is_training=False)

print(f"[OK] Pipeline hazır. Batch Size: {BATCH_SIZE} (Makale uyumlu)")

## Hücre 5: Model Oluşturma

In [None]:
from tensorflow.keras.applications import EfficientNetV2S
from tensorflow.keras import layers, models, optimizers

print("[INFO] Model: EfficientNetV2-S (Small) hazırlanıyor...")

base_model = EfficientNetV2S(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3),
    include_preprocessing=True
)

total_layers = len(base_model.layers)
split_point = int(total_layers * 0.5)

print(f"[INFO] Toplam Parametre: ~21M (Large modelin 1/6'sı)")
print(f"[INFO] Toplam Katman: {total_layers}. Fine-tuning: {split_point}. katmandan itibaren.")

base_model.trainable = True
for layer in base_model.layers[:split_point]:
    layer.trainable = False

for layer in base_model.layers[split_point:]:
    if isinstance(layer, layers.BatchNormalization):
        layer.trainable = False

inputs = layers.Input(shape=(224, 224, 3))
x = base_model(inputs)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(3, activation='softmax', dtype='float32')(x)

model = models.Model(inputs, outputs, name='EffNetV2S_Ablation_Study')

optimizer = optimizers.Adadelta(learning_rate=0.1)

model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

## Hücre 6: Callbacks

In [None]:
from tensorflow.keras.callbacks import (
    EarlyStopping, ModelCheckpoint, ReduceLROnPlateau,
    CSVLogger, LearningRateScheduler
)

def lr_schedule(epoch, lr):
    """Warmup for first 3 epochs"""
    if epoch < 3:
        return scaled_lr * (epoch + 1) / 3
    return scaled_lr

callbacks_phase1 = [
    LearningRateScheduler(lr_schedule, verbose=0),
    EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    ),
    ModelCheckpoint(
        '/content/resnet50_best.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=0
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=4,
        min_lr=1e-7,
        verbose=0
    ),
    CSVLogger('/content/training_log.csv')
]

callbacks_phase2 = [
    EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    ),
    ModelCheckpoint(
        '/content/resnet50_best.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=0
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=4,
        min_lr=1e-7,
        verbose=0
    ),
    CSVLogger('/content/training_log.csv', append=True)
]

print("[OK] Callbacks hazır")

## Hücre 7: Train

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau

callbacks = [

    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        '/content/effnetv2s_best.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    ),
    CSVLogger('/content/training_log_paper.csv'),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=4,
        verbose=1
    )
]

class_weights = {
    0: 1.0,
    1: 1.1,
    2: 1.1
}

print("[TRAINING] Başlıyor...")
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=40, # EarlyStopping durduracak
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

## Hücre 8: Test Evaluation

In [None]:
from sklearn.metrics import (
    classification_report, confusion_matrix,
    precision_score, recall_score, roc_curve, auc
)
import numpy as np
import tensorflow as tf

class_names = ['Normal', 'Adenocarcinoma', 'Squamous']

print("[INFO] Model yükleniyor...")
model = tf.keras.models.load_model('/content/effnetv2s_best.keras')

print("[INFO] Test seti üzerinde ölçüm yapılıyor...")
test_loss, test_acc = model.evaluate(test_ds, verbose=0)

print("[INFO] Tahminler üretiliyor...")
y_pred_probs = model.predict(test_ds, verbose=0)
y_true = test_df['label'].values
y_pred = np.argmax(y_pred_probs, axis=1)

print("="*60)
print("TEST SONUÇLARI - 3 SINIF")
print("="*60)
print(f"Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"Loss: {test_loss:.4f}")
print(f"\nSınıflar: {class_names}")

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:")
print(cm)

print(f"\nSınıf Başına Doğruluk:")
for i, name in enumerate(class_names):
    correct = cm[i, i]
    total = cm[i, :].sum()
    if total > 0:
        print(f"  {name}: {correct}/{total} ({correct/total*100:.1f}%)")
    else:
        print(f"  {name}: 0/0 (Veri yok)")

## Hücre 9: Analysis

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print("[INFO] En iyi model yükleniyor...")
model_path = '/content/effnetv2s_best.keras'
model = tf.keras.models.load_model(model_path)

print("[INFO] Test seti üzerinde tahmin yapılıyor...")
y_pred_probs = model.predict(test_ds, verbose=1)
y_pred = np.argmax(y_pred_probs, axis=1)

y_true = test_df['label'].values.astype(int)

class_names = ['Normal', 'Adenocarcinoma', 'Squamous']

print("\n" + "="*60)
print("TEST SONUÇLARI (EfficientNetV2-S)")
print("="*60)

test_acc = np.sum(y_pred == y_true) / len(y_true)
print(f"Genel Doğruluk (Accuracy): {test_acc:.4f} ({test_acc*100:.2f}%)")

print("\nSınıflandırma Raporu:")
print(classification_report(y_true, y_pred, target_names=class_names))

cm = confusion_matrix(y_true, y_pred)

print("\n" + "="*60)
print("SINIF BAZLI DETAYLI ANALİZ")
print("="*60)

for i, name in enumerate(class_names):
    mask = (y_true == i)
    correct = (y_pred[mask] == i).sum()
    total = mask.sum()

    precision = precision_score(y_true, y_pred, labels=[i], average=None, zero_division=0)[0]
    recall = recall_score(y_true, y_pred, labels=[i], average=None, zero_division=0)[0]
    f1 = f1_score(y_true, y_pred, labels=[i], average=None, zero_division=0)[0]

    print(f"\n{name}:")
    print(f"  Doğru/Toplam: {correct}/{total} ({correct/total*100:.1f}%)")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall:    {recall:.4f}")
    print(f"  F1-Score:  {f1:.4f}")

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names,
            yticklabels=class_names)
plt.xlabel('Tahmin Edilen (Predicted)', fontsize=12)
plt.ylabel('Gerçek (True)', fontsize=12)
plt.title('Confusion Matrix (EfficientNetV2-S)', fontsize=14)
plt.show()

cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='Greens',
            xticklabels=class_names,
            yticklabels=class_names)
plt.xlabel('Tahmin Edilen', fontsize=12)
plt.ylabel('Gerçek', fontsize=12)
plt.title('Normalized Confusion Matrix', fontsize=14)
plt.show()

# Hücre 10: Grafik

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

def plot_training_history_hd(history=None, log_path='/content/training_log_paper.csv'):
    """
    Eğitim grafiklerini (Accuracy & Loss) yayın kalitesinde (Publication Quality) çizer.
    Bellekteki 'history' objesini veya CSV log dosyasını kullanabilir.
    """

    try:
        if history is not None:
            acc = history.history['accuracy']
            val_acc = history.history['val_accuracy']
            loss = history.history['loss']
            val_loss = history.history['val_loss']
            source = "Memory"
        elif os.path.exists(log_path):
            df = pd.read_csv(log_path)
            acc = df['accuracy'].values
            val_acc = df['val_accuracy'].values
            loss = df['loss'].values
            val_loss = df['val_loss'].values
            source = "CSV Log"
        else:
            print("HATA: Veri kaynağı bulunamadı.")
            return
    except Exception as e:
        print(f"Veri okuma hatası: {e}")
        return

    epochs = range(1, len(acc) + 1)

    plt.rcParams.update({
        'font.family': 'sans-serif',
        'font.sans-serif': ['Arial', 'DejaVu Sans'],
        'font.size': 12,
        'axes.titlesize': 16,
        'axes.labelsize': 14,
        'xtick.labelsize': 12,
        'ytick.labelsize': 12,
        'legend.fontsize': 12,
        'figure.dpi': 300,
        'axes.linewidth': 1.5,
        'grid.alpha': 0.3
    })

    color_train = '#0056b3'
    color_val = '#d62728'
    color_best = '#2ca02c'

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7), constrained_layout=True)
    fig.suptitle(f'Model Training Performance (Source: {source})', fontsize=20, weight='bold', y=1.05)

    ax1.plot(epochs, acc, label='Training Accuracy', color=color_train, linewidth=2.5, alpha=0.9)
    ax1.plot(epochs, val_acc, label='Validation Accuracy', color=color_val, linewidth=2.5, alpha=0.9)

    ax1.fill_between(epochs, acc, val_acc, color='gray', alpha=0.1)

    best_acc_epoch = np.argmax(val_acc) + 1
    best_acc_val = np.max(val_acc)

    ax1.axvline(x=best_acc_epoch, color=color_best, linestyle='--', alpha=0.7, linewidth=1.5)
    ax1.scatter(best_acc_epoch, best_acc_val, s=150, c=color_best, edgecolors='white', zorder=5)

    ax1.annotate(
        f'Best Accuracy\n{best_acc_val:.4f}\n(Epoch {best_acc_epoch})',
        xy=(best_acc_epoch, best_acc_val),
        xytext=(10, -40), textcoords='offset points',
        bbox=dict(boxstyle="round,pad=0.5", fc="white", ec=color_best, alpha=0.9),
        arrowprops=dict(arrowstyle="->", color=color_best, connectionstyle="arc3,rad=.2"),
        ha='left', fontsize=11, color='#333333'
    )

    ax1.set_title('Accuracy Evolution', weight='bold', pad=15)
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Accuracy')
    ax1.legend(loc='lower right', frameon=True, framealpha=0.9, shadow=True)
    ax1.grid(True, linestyle='--', alpha=0.4)
    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)

    ax2.plot(epochs, loss, label='Training Loss', color=color_train, linewidth=2.5, alpha=0.9)
    ax2.plot(epochs, val_loss, label='Validation Loss', color=color_val, linewidth=2.5, alpha=0.9)

    ax2.fill_between(epochs, loss, val_loss, color='gray', alpha=0.1)

    best_loss_epoch = np.argmin(val_loss) + 1
    best_loss_val = np.min(val_loss)

    ax2.axvline(x=best_loss_epoch, color=color_best, linestyle='--', alpha=0.7, linewidth=1.5)
    ax2.scatter(best_loss_epoch, best_loss_val, s=150, c=color_best, edgecolors='white', zorder=5)

    ax2.annotate(
        f'Best Loss\n{best_loss_val:.5f}\n(Epoch {best_loss_epoch})',
        xy=(best_loss_epoch, best_loss_val),
        xytext=(10, 40), textcoords='offset points',
        bbox=dict(boxstyle="round,pad=0.5", fc="white", ec=color_best, alpha=0.9),
        arrowprops=dict(arrowstyle="->", color=color_best, connectionstyle="arc3,rad=.2"),
        ha='left', fontsize=11, color='#333333'
    )

    ax2.set_title('Loss Convergence', weight='bold', pad=15)
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Loss')
    ax2.legend(loc='upper right', frameon=True, framealpha=0.9, shadow=True)
    ax2.grid(True, linestyle='--', alpha=0.4)
    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)

    save_path = '/content/training_results_hd'
    plt.savefig(f'{save_path}.png', dpi=300, bbox_inches='tight')
    plt.savefig(f'{save_path}.pdf', bbox_inches='tight')
    plt.savefig(f'{save_path}.svg', bbox_inches='tight')

    plt.show()
    print(f"[INFO] Grafikler kaydedildi:")
    print(f"   - PNG (300 DPI): {save_path}.png")
    print(f"   - PDF (Vector):  {save_path}.pdf (Rapor için bunu kullanın)")

# Çalıştırma
try:
    plot_training_history_hd(history)
except NameError:
    plot_training_history_hd()