In [1]:
!pip install gdown
!pip install -q albumentations

# --- 1. ADIM: VERİ SETİNİ KAGGLE ORTAMINA İNDİRME VE AÇMA ---
import os

# İndirilecek dosyanın Google Drive ID'si ve hedef dosya adı
# Link: https://drive.google.com/file/d/1bPtSXPnMYP5z3PUUVoZPk7NLfm8R_mzM/
file_id = "1bPtSXPnMYP5z3PUUVoZPk7NLfm8R_mzM"
zip_dosya_yolu = "MURA-v1.1.zip"
hedef_klasor = "unziped_mura/" # Dosyaların çıkarılacağı klasör

# gdown ile dosyayı indiriyoruz
print("Veri seti indiriliyor...")
!gdown --id {file_id} -O {zip_dosya_yolu}
print("\nİndirme tamamlandı.")

# Hedef klasörü hazırlama ve zip dosyasını açma
print(f"\n'{hedef_klasor}' hazırlanıyor...")
!rm -rf "{hedef_klasor}" # Eğer klasör varsa temizle
!mkdir -p "{hedef_klasor}"
!unzip -q -n "{zip_dosya_yolu}" -d "{hedef_klasor}" # -q (quiet) modu logları azaltır, -n (no overwrite)
print("\nZip dosyasından çıkarma işlemi tamamlandı.")

Veri seti indiriliyor...
Downloading...
From (original): https://drive.google.com/uc?id=1bPtSXPnMYP5z3PUUVoZPk7NLfm8R_mzM
From (redirected): https://drive.google.com/uc?id=1bPtSXPnMYP5z3PUUVoZPk7NLfm8R_mzM&confirm=t&uuid=30aa529c-e09c-41f2-8d30-3db85f37a0dd
To: /kaggle/working/MURA-v1.1.zip
100%|██████████████████████████████████████| 3.38G/3.38G [00:40<00:00, 83.7MB/s]

İndirme tamamlandı.

'unziped_mura/' hazırlanıyor...

Zip dosyasından çıkarma işlemi tamamlandı.


In [2]:
# --- 2. ADIM: VERİLERİ YÜKLEME VE HAZIRLAMA ---
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.utils.class_weight import compute_class_weight
import albumentations as A


main_dir = hedef_klasor # Artık ana dizinimiz burası
train_csv_path = os.path.join(main_dir, "MURA-v1.1/train_image_paths.csv")
valid_csv_path = os.path.join(main_dir, "MURA-v1.1/valid_image_paths.csv")

# Veri çerçevelerini oluşturma
train_df = pd.read_csv(train_csv_path, header=None, names=['image_path'])
train_df['full_path'] = train_df['image_path'].apply(lambda path: os.path.join(main_dir, path))
train_df['label'] = train_df['image_path'].apply(lambda path: 1 if 'positive' in path else 0)
train_df['label'] = train_df['label'].astype(str)

valid_df = pd.read_csv(valid_csv_path, header=None, names=['image_path'])
valid_df['full_path'] = valid_df['image_path'].apply(lambda path: os.path.join(main_dir, path))
valid_df['label'] = valid_df['image_path'].apply(lambda path: 1 if 'positive' in path else 0)
valid_df['label'] = valid_df['label'].astype(str)

print("\nEğitim Veri Seti Dağılımı:\n", train_df['label'].value_counts())
print("\nDoğrulama Veri Seti Dağılımı:\n", valid_df['label'].value_counts())


2025-07-28 08:36:31.526647: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753691791.775220      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753691791.843295      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Eğitim Veri Seti Dağılımı:
 label
0    21935
1    14873
Name: count, dtype: int64

Doğrulama Veri Seti Dağılımı:
 label
0    1667
1    1530
Name: count, dtype: int64


In [3]:
### --- ADIM 3: VERİLERİ HAZIRLAMA VE SINIF AĞIRLIKLARI (DÜZELTİLMİŞ) --- ###

print("\n--- Veri Bilgileri Yükleniyor ve Hazırlanıyor ---")
main_dir = hedef_klasor
train_csv_path = os.path.join(main_dir, "MURA-v1.1/train_image_paths.csv")
valid_csv_path = os.path.join(main_dir, "MURA-v1.1/valid_image_paths.csv")

train_df = pd.read_csv(train_csv_path, header=None, names=['image_path'])
train_df['full_path'] = train_df['image_path'].apply(lambda path: os.path.join(main_dir, path))
train_df['label'] = train_df['image_path'].apply(lambda path: 1 if 'positive' in path else 0)

valid_df = pd.read_csv(valid_csv_path, header=None, names=['image_path'])
valid_df['full_path'] = valid_df['image_path'].apply(lambda path: os.path.join(main_dir, path))
valid_df['label'] = valid_df['image_path'].apply(lambda path: 1 if 'positive' in path else 0)

# Sınıf ağırlıklarını hesapla
class_weights = compute_class_weight('balanced', classes=np.unique(train_df['label']), y=train_df['label'])
class_weight_dict = dict(enumerate(class_weights))
print(f"Hesaplanan Sınıf Ağırlıkları: {class_weight_dict}")



--- Veri Bilgileri Yükleniyor ve Hazırlanıyor ---
Hesaplanan Sınıf Ağırlıkları: {0: 0.8390243902439024, 1: 1.2374100719424461}


In [5]:
print("\n--- Albumentations Pipeline'ları Oluşturuluyor ---")
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

def get_train_augs():
    """Eğitim seti için kullanılacak 'hafifletilmiş' ve güvenli zenginleştirme adımları."""
    return A.Compose([
        A.Resize(height=IMAGE_SIZE[0], width=IMAGE_SIZE[1]),
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.08, scale_limit=0.1, rotate_limit=10, p=0.7),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize()
    ])

def get_valid_augs():
    """Doğrulama seti için sadece yeniden boyutlandırma ve normalizasyon."""
    return A.Compose([
        A.Resize(height=IMAGE_SIZE[0], width=IMAGE_SIZE[1]),
        A.Normalize()
    ])


--- Albumentations Pipeline'ları Oluşturuluyor ---


In [6]:
class AlbumentationsDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size, augmentations, shuffle=False):
        self.df, self.batch_size, self.augmentations, self.shuffle = dataframe, batch_size, augmentations, shuffle
        self.image_paths, self.labels = self.df['full_path'].tolist(), self.df['label'].tolist()
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size : (index + 1) * self.batch_size]
        batch_images, batch_labels = [], []
        for i in batch_indices:
            img = cv2.imread(self.image_paths[i])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            if self.augmentations:
                img = self.augmentations(image=img)['image']
            batch_images.append(img)
            batch_labels.append(self.labels[i])
        return np.array(batch_images), np.array(batch_labels)

    def on_epoch_end(self):
        self.indices = np.arange(len(self.image_paths))
        if self.shuffle: np.random.shuffle(self.indices)

train_generator = AlbumentationsDataGenerator(train_df, BATCH_SIZE, get_train_augs(), shuffle=True)
validation_generator = AlbumentationsDataGenerator(valid_df, BATCH_SIZE, get_valid_augs(), shuffle=False)
print("Albumentations veri üreteçleri hazır.")



Albumentations veri üreteçleri hazır.


  original_init(self, **validated_kwargs)


In [7]:
print("\n--- ResNet50V2 Modeli Oluşturuluyor ---")
base_model = ResNet50V2(input_shape=(*IMAGE_SIZE, 3), include_top=False, weights='imagenet')
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])


--- ResNet50V2 Modeli Oluşturuluyor ---


I0000 00:00:1753692167.623125      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [None]:
print("\n--- İlk Aşama Eğitimi Başlıyor ---")
INITIAL_EPOCHS = 25 # Bu aşama için epoch sayısını biraz artırabiliriz
model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint_phase1 = ModelCheckpoint('best_model_phase1.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history_phase1 = model.fit(
    train_generator,
    epochs=INITIAL_EPOCHS,
    validation_data=validation_generator,
    class_weight=class_weight_dict,
    callbacks=[checkpoint_phase1, early_stopping]
)


--- İlk Aşama Eğitimi Başlıyor ---


  self._warn_if_super_not_called()


Epoch 1/25


I0000 00:00:1753692420.984371     115 service.cc:148] XLA service 0x7d6aa0291a80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1753692420.985759     115 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1753692422.410760     115 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   1/1151[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:02:24[0m 16s/step - accuracy: 0.5312 - loss: 2.2946

I0000 00:00:1753692427.692200     115 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step - accuracy: 0.5356 - loss: 1.2998
Epoch 1: val_loss improved from inf to 0.75108, saving model to best_model_phase1.h5
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 191ms/step - accuracy: 0.5356 - loss: 1.2994 - val_accuracy: 0.5912 - val_loss: 0.7511
Epoch 2/25
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step - accuracy: 0.5533 - loss: 0.7487
Epoch 2: val_loss improved from 0.75108 to 0.70952, saving model to best_model_phase1.h5
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 179ms/step - accuracy: 0.5533 - loss: 0.7487 - val_accuracy: 0.5812 - val_loss: 0.7095
Epoch 3/25
[1m1102/1151[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m8s[0m 171ms/step - accuracy: 0.5694 - loss: 0.7196

In [9]:
print("\n--- İnce Ayar (Fine-Tuning) Aşaması Başlıyor ---")
model = load_model('best_model_phase1.h5') # İlk aşamanın en iyi modelini yükle
base_model = model.layers[0]
base_model.trainable = True

fine_tune_at = 140
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

FINE_TUNE_EPOCHS = 40 # İnce ayar için daha fazla epoch
decay_steps = len(train_generator) * FINE_TUNE_EPOCHS
cosine_decay_scheduler = tf.keras.optimizers.schedules.CosineDecay(1e-5, decay_steps, alpha=1e-7)

model.compile(optimizer=Adam(learning_rate=cosine_decay_scheduler), loss='binary_crossentropy', metrics=['accuracy'])
checkpoint_finetune = ModelCheckpoint('best_model_finetuned.h5', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1)

history_finetune = model.fit(
    train_generator,
    epochs=FINE_TUNE_EPOCHS,
    validation_data=validation_generator,
    class_weight=class_weight_dict,
    callbacks=[checkpoint_finetune]
)


--- İnce Ayar (Fine-Tuning) Aşaması Başlıyor ---

--- Model Özeti (İnce Ayar Aşaması) ---


In [None]:
print("\n--- Nihai Model Değerlendiriliyor ---")
model = load_model('best_model_finetuned.h5') # Tüm sürecin en iyi modelini yükle
scores = model.evaluate(validation_generator)
print("-" * 50)
print(f"NİHAİ DOĞRULUK: {scores[1] * 100:.2f}%")
print(f"NİHAİ KAYIP: {scores[0]}")
print("-" * 50)


Model 30 epoch daha (ince ayar) eğitiliyor...
Epoch 21/50
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m336s[0m 270ms/step - accuracy: 0.5656 - loss: 0.6963 - val_accuracy: 0.6212 - val_loss: 0.6429 - learning_rate: 1.0000e-05
Epoch 22/50
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 261ms/step - accuracy: 0.6305 - loss: 0.6573 - val_accuracy: 0.6606 - val_loss: 0.6152 - learning_rate: 1.0000e-05
Epoch 23/50
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 270ms/step - accuracy: 0.6535 - loss: 0.6374 - val_accuracy: 0.6775 - val_loss: 0.5901 - learning_rate: 1.0000e-05
Epoch 24/50
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 257ms/step - accuracy: 0.6764 - loss: 0.6158 - val_accuracy: 0.6991 - val_loss: 0.5755 - learning_rate: 1.0000e-05
Epoch 25/50
[1m1151/1151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m289s[0m 251ms/step - accuracy: 0.6851 - loss: 0.6030 - val_accuracy: 0.7079 - val_loss: 0.5