<a href="https://colab.research.google.com/github/davidarvai/DIPLOMADOLGOZAT-/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import math
import numpy as np
import nibabel as nib
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# -----------------------------
# Konfúziós mátrix és metrikák függvényei
# -----------------------------
def get_custom_confusion_matrix(tumor_type, tn, fp, fn, tp):
    if tumor_type == "Whole Tumor":
        custom_matrix = np.array([[tn, fp, fp, fp],
                                  [fn, tp, tp, tp],
                                  [fn, tp, tp, tp],
                                  [fn, tp, tp, tp]])
    elif tumor_type == "Edema":
        custom_matrix = np.array([[tn, tn, fp, tn],
                                  [tn, tn, fp, tn],
                                  [fn, fn, tp, fn],
                                  [tn, tn, fp, tn]])
    elif tumor_type == "Tumor Core":
        custom_matrix = np.array([[tn, fp, tn, fp],
                                  [fn, tp, fn, tp],
                                  [tn, fp, tn, fp],
                                  [fn, tp, fn, tp]])
    elif tumor_type == "Enhancing Core":
        custom_matrix = np.array([[tn, tn, tn, fp],
                                  [tn, tn, tn, fp],
                                  [tn, tn, tn, fp],
                                  [fn, fn, fn, tp]])
    else:
        custom_matrix = None
    return custom_matrix

def compute_confusion(gt_mask, pred_mask):
    tn = np.sum((gt_mask==False) & (pred_mask==False))
    tp = np.sum((gt_mask==True)  & (pred_mask==True))
    fp = np.sum((gt_mask==False) & (pred_mask==True))
    fn = np.sum((gt_mask==True)  & (pred_mask==False))
    return tn, fp, fn, tp

def compute_metrics(tn, fp, fn, tp):
    TPR = tp / (tp + fn) if (tp+fn) > 0 else 0
    TNR = tn / (tn + fp) if (tn+fp) > 0 else 0
    PPV = tp / (tp + fp) if (tp+fp) > 0 else 0
    NPV = tn / (tn + fn) if (tn+fn) > 0 else 0
    ACC = (tp + tn) / (tp + tn + fp + fn) if (tp+tn+fp+fn)>0 else 0
    DS  = (2 * tp) / (2 * tp + fp + fn) if (2 * tp + fp + fn)>0 else 0
    return TPR, TNR, PPV, NPV, ACC, DS

# -----------------------------
# Adat betöltése
# -----------------------------
def remap_segmentation(seg):
    seg_new = np.copy(seg)
    seg_new[seg == 4] = 3  # remappeljük: 4 -> 3 (Enhancing Core)
    return seg_new

def load_subject_data(subject_path):
    files = os.listdir(subject_path)
    subject_data = {}
    for file in files:
        if file.endswith('.nii') or file.endswith('.nii.gz'):
            lower = file.lower()
            if 'seg' in lower:
                subject_data['seg'] = os.path.join(subject_path, file)
            else:
                for mod in ['t1', 't1ce', 't2', 'flair']:
                    if mod in lower:
                        subject_data[mod] = os.path.join(subject_path, file)
    return subject_data

def load_data_from_dir(data_dir):
    X_list = []
    Y_list = []
    subject_names = []
    subject_dirs = [os.path.join(data_dir, d) for d in os.listdir(data_dir)
                    if os.path.isdir(os.path.join(data_dir, d))]
    subject_dirs.sort()
    for subject_path in subject_dirs:
        data_files = load_subject_data(subject_path)
        if all(mod in data_files for mod in ['t1', 't1ce', 't2', 'flair']) and 'seg' in data_files:
            modality_imgs = []
            for mod in ['t1', 't1ce', 't2', 'flair']:
                img = nib.load(data_files[mod]).get_fdata()
                modality_imgs.append(img)
            X = np.stack(modality_imgs, axis=-1)  # shape: (H,W,D,4)
            seg = nib.load(data_files['seg']).get_fdata()
            seg = remap_segmentation(seg)
            X_list.append(X)
            Y_list.append(seg)
            subject_names.append(os.path.basename(subject_path))
        else:
            print("Hiányos adatok:", subject_path)
    return X_list, Y_list, subject_names

def normalize_volume(vol):
    vol = vol.astype(np.float32)
    vol = (vol - np.min(vol)) / (np.max(vol) - np.min(vol) + 1e-8)
    return vol

# -----------------------------
# 2D szeletek kinyerése
# -----------------------------
def extract_slices(volume, seg, slice_axis=2, include_bg_ratio=0.3):
    slices_x = []
    slices_y = []
    D = volume.shape[slice_axis]
    for i in range(D):
        img_slice = volume[:,:,i,:]  # shape: (H,W,4)
        seg_slice = seg[:,:,i]       # shape: (H,W)
        if np.sum(seg_slice > 0) > 0.01 * (seg_slice.shape[0]*seg_slice.shape[1]):
            slices_x.append(img_slice)
            slices_y.append(seg_slice)
        else:
            if np.random.rand() < include_bg_ratio:
                slices_x.append(img_slice)
                slices_y.append(seg_slice)
    return slices_x, slices_y

def create_2d_dataset(volumes, segmentations):
    X_slices = []
    Y_slices = []
    for vol, seg in zip(volumes, segmentations):
        xs, ys = extract_slices(vol, seg, slice_axis=2, include_bg_ratio=0.3)
        X_slices.extend(xs)
        Y_slices.extend(ys)
    X_slices = np.array(X_slices)  # shape: (n_slices, H, W, 4)
    Y_slices = np.array(Y_slices)  # shape: (n_slices, H, W)
    return X_slices, Y_slices

# -----------------------------
# 2D U-Net modell építése
# -----------------------------
def unet2d_model(input_shape, n_filters=32, n_classes=4):
    inputs = Input(input_shape)
    c1 = Conv2D(n_filters, 3, activation='relu', padding='same')(inputs)
    c1 = Conv2D(n_filters, 3, activation='relu', padding='same')(c1)
    p1 = MaxPooling2D((2,2))(c1)

    c2 = Conv2D(n_filters*2, 3, activation='relu', padding='same')(p1)
    c2 = Conv2D(n_filters*2, 3, activation='relu', padding='same')(c2)
    p2 = MaxPooling2D((2,2))(c2)

    c3 = Conv2D(n_filters*4, 3, activation='relu', padding='same')(p2)
    c3 = Conv2D(n_filters*4, 3, activation='relu', padding='same')(c3)

    u2 = UpSampling2D((2,2))(c3)
    u2 = concatenate([u2, c2])
    c4 = Conv2D(n_filters*2, 3, activation='relu', padding='same')(u2)
    c4 = Conv2D(n_filters*2, 3, activation='relu', padding='same')(c4)

    u1 = UpSampling2D((2,2))(c4)
    u1 = concatenate([u1, c1])
    c5 = Conv2D(n_filters, 3, activation='relu', padding='same')(u1)
    c5 = Conv2D(n_filters, 3, activation='relu', padding='same')(c5)

    outputs = Conv2D(n_classes, 1, activation='softmax')(c5)
    model = Model(inputs=[inputs], outputs=[outputs])
    return model

# -----------------------------
# Kombinált veszteség (2D): Sparse Crossentropy + Dice loss
# -----------------------------
def dice_loss_2d(y_true, y_pred, smooth=1e-6):
    # Ha y_true utolsó dimenziója 1, akkor squeeze-eljük
    if y_true.shape[-1] == 1:
        y_true = tf.squeeze(y_true, axis=-1)
    y_true = tf.one_hot(tf.cast(y_true, tf.int32), depth=4)  # shape: (batch, H, W, 4)
    intersection = tf.reduce_sum(y_true * y_pred, axis=[1,2])
    union = tf.reduce_sum(y_true, axis=[1,2]) + tf.reduce_sum(y_pred, axis=[1,2])
    dice = (2. * intersection + smooth) / (union + smooth)
    return 1 - tf.reduce_mean(dice)

def combined_loss_2d(y_true, y_pred):
    ce_loss = tf.keras.losses.SparseCategoricalCrossentropy()(y_true, y_pred)
    d_loss = dice_loss_2d(y_true, y_pred)
    return ce_loss + d_loss

# -----------------------------
# Fő program
# -----------------------------
if __name__ == "__main__":
    # Állítsd be az útvonalakat a tréning és teszt adatokhoz
    train_dir = "/content/drive/My Drive/Allamvizsga/Data/Teszt/Train"
    test_dir  = "/content/drive/My Drive/Allamvizsga/Data/Teszt/Teszt"

    print("Train adatok betöltése...")
    X_train_vols, Y_train_vols, train_subject_names = load_data_from_dir(train_dir)
    if len(X_train_vols) == 0:
        raise ValueError("Nincsenek betöltött train adatok!")
    X_train_vols = [normalize_volume(vol) for vol in X_train_vols]

    # 2D szeletek kinyerése a tréning adatokból
    X_train_slices, Y_train_slices = create_2d_dataset(X_train_vols, Y_train_vols)
    print("Train szeletek alakja:", X_train_slices.shape, Y_train_slices.shape)

    # Train/Validation split
    X_train, X_val, Y_train, Y_val = train_test_split(X_train_slices, Y_train_slices, test_size=0.2, random_state=42)

    input_shape = X_train.shape[1:]  # (H, W, 4)
    model = unet2d_model(input_shape=input_shape, n_filters=32, n_classes=4)
    model.compile(optimizer=Adam(learning_rate=1e-4), loss=combined_loss_2d, metrics=['accuracy'])
    model.summary()

    # Tanítás
    epochs = 20
    batch_size = 8
    model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=epochs, batch_size=batch_size)

    # -----------------------------
    # Tesztelés: Az egyes teszt subject-ekből a 3D volument a szeletekből állítjuk vissza.
    # -----------------------------
    print("Teszt adatok betöltése...")
    X_test_vols, Y_test_vols, test_subject_names = load_data_from_dir(test_dir)
    if len(X_test_vols) == 0:
        raise ValueError("Nincsenek betöltött teszt adatok!")
    X_test_vols = [normalize_volume(vol) for vol in X_test_vols]

    metrics_rows = []
    output_txt_lines = []
    tumor_types = ["Whole Tumor", "Edema", "Tumor Core", "Enhancing Core"]

    for vol, seg, subj_name in zip(X_test_vols, Y_test_vols, test_subject_names):
        H, W, D, _ = vol.shape
        pred_vol = np.zeros((H, W, D), dtype=np.int32)
        for i in range(D):
            x_slice = vol[:,:,i,:]  # (H, W, 4)
            x_slice = np.expand_dims(x_slice, axis=0)
            pred_slice = model.predict(x_slice)[0]  # (H, W, 4)
            pred_slice = np.argmax(pred_slice, axis=-1)  # (H, W)
            pred_vol[:,:,i] = pred_slice
        # Metrikák számítása
        def get_binary_mask_3d(segmentation, tumor_type):
            if tumor_type == "Whole Tumor":
                return np.isin(segmentation, [1,2,3])
            elif tumor_type == "Edema":
                return (segmentation == 2)
            elif tumor_type == "Tumor Core":
                return np.isin(segmentation, [1,3])
            elif tumor_type == "Enhancing Core":
                return (segmentation == 3)
            else:
                raise ValueError("Ismeretlen tumor típus!")
        for tumor in tumor_types:
            gt_mask = get_binary_mask_3d(seg, tumor)
            pred_mask = get_binary_mask_3d(pred_vol, tumor)
            tn, fp, fn, tp = compute_confusion(gt_mask, pred_mask)
            TPR, TNR, PPV, NPV, ACC, DS = compute_metrics(tn, fp, fn, tp)
            cm = get_custom_confusion_matrix(tumor, tn, fp, fn, tp)

            metrics_rows.append({
                "Name": subj_name,
                "TumorType": tumor,
                "TP": tp,
                "TN": tn,
                "FP": fp,
                "FN": fn,
                "TPR": round(TPR, 3),
                "TNR": round(TNR, 3),
                "PPV": round(PPV, 3),
                "NPV": round(NPV, 3),
                "ACC": round(ACC, 3),
                "DS": round(DS, 3)
            })

            txt_block = f"Mapa neve: {subj_name}\nTumor típus: {tumor}\nKonfúziós mátrix:\n{cm}\n"
            txt_block += f"True Positive Rate (TPR): {round(TPR,3)}\n"
            txt_block += f"True Negative Rate (TNR): {round(TNR,3)}\n"
            txt_block += f"Positive Predictive Value (PPV): {round(PPV,3)}\n"
            txt_block += f"Negative Predictive Value (NPV): {round(NPV,3)}\n"
            txt_block += f"Accuracy (ACC): {round(ACC,3)}\n"
            txt_block += f"Dice Score (DS): {round(DS,3)}\n\n"
            output_txt_lines.append(txt_block)

    metrics_df = pd.DataFrame(metrics_rows, columns=["Name","TumorType","TP","TN","FP","FN","TPR","TNR","PPV","NPV","ACC","DS"])
    metrics_df.to_csv("metrics_output.csv", index=False)
    print("A metrics_output.csv fájl elmentve.")

    with open("output.txt", "w") as f:
        f.write("".join(output_txt_lines))
    print("Az output.txt fájl elmentve.")


Train adatok betöltése...
Train szeletek alakja: (778, 240, 240, 4) (778, 240, 240)


Epoch 1/20


Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(None, 240, 240, 4))


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 296ms/step - accuracy: 0.9774 - loss: 1.8400 - val_accuracy: 0.9819 - val_loss: 0.8045
Epoch 2/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 98ms/step - accuracy: 0.9810 - loss: 0.7926 - val_accuracy: 0.9819 - val_loss: 0.7525
Epoch 3/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 101ms/step - accuracy: 0.9828 - loss: 0.7512 - val_accuracy: 0.9852 - val_loss: 0.7144
Epoch 4/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 98ms/step - accuracy: 0.9833 - loss: 0.7269 - val_accuracy: 0.9865 - val_loss: 0.6959
Epoch 5/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 101ms/step - accuracy: 0.9867 - loss: 0.6951 - val_accuracy: 0.9873 - val_loss: 0.7363
Epoch 6/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 102ms/step - accuracy: 0.9861 - loss: 0.7177 - val_accuracy: 0.9857 - val_loss: 0.6744
Epoch 7/20
[1m78/78[0m [32m━━━━━━━

Expected: ['keras_tensor']
Received: inputs=Tensor(shape=(1, 240, 240, 4))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms