In [None]:
!pip install kagglehub --quiet

In [None]:
from kagglehub import dataset_download
import os

path = dataset_download("mateuszbuda/lgg-mri-segmentation")
print("Dataset downloaded to:", path)

In [None]:
print("Top-level contents:", os.listdir(path))

DATASET_DIR = os.path.join(path, "kaggle_3m")

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

IMG_HEIGHT = 128
IMG_WIDTH = 128

In [None]:
def load_data_recursive(base_dir):
    image_files = []
    mask_files = []

    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".tif"):
                full_path = os.path.join(root, file)
                if "_mask" in file:
                    mask_files.append(full_path)
                else:
                    image_files.append(full_path)

    image_files = sorted(image_files)
    mask_files = sorted(mask_files)

    print(f"Found {len(image_files)} images and {len(mask_files)} masks.")

    X, Y = [], []
    for img_path in image_files:
        base_name = os.path.basename(img_path).replace(".tif", "")
        mask_name = base_name + "_mask.tif"
        mask_path = os.path.join(os.path.dirname(img_path), mask_name)

        if not os.path.exists(mask_path):
            continue

        img = cv2.imread(img_path)
        img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT)) / 255.0

        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask, (IMG_WIDTH, IMG_HEIGHT)) / 255.0
        mask = np.expand_dims(mask, axis=-1)

        X.append(img)
        Y.append(mask)

    return np.array(X), np.array(Y)

In [None]:
X, Y = load_data_recursive(DATASET_DIR)
print("Loaded dataset:", X.shape, Y.shape)

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)
print("Train:", X_train.shape, "Val:", X_val.shape)

In [None]:
def conv_block(inputs, filters):
    x = layers.Conv2D(filters, 3, activation='relu', padding='same')(inputs)
    x = layers.Conv2D(filters, 3, activation='relu', padding='same')(x)
    return x

def encoder_block(inputs, filters):
    x = conv_block(inputs, filters)
    p = layers.MaxPooling2D((2, 2))(x)
    return x, p

def decoder_block(inputs, skip, filters):
    x = layers.Conv2DTranspose(filters, (2, 2), strides=(2, 2), padding='same')(inputs)
    x = layers.concatenate([x, skip])
    x = conv_block(x, filters)
    return x

def build_unet(input_shape):
    inputs = layers.Input(input_shape)

    s1, p1 = encoder_block(inputs, 64)
    s2, p2 = encoder_block(p1, 128)
    s3, p3 = encoder_block(p2, 256)

    b1 = conv_block(p3, 512)

    d1 = decoder_block(b1, s3, 256)
    d2 = decoder_block(d1, s2, 128)
    d3 = decoder_block(d2, s1, 64)

    outputs = layers.Conv2D(1, 1, activation='sigmoid')(d3)

    model = models.Model(inputs, outputs, name='UNet')
    return model

In [None]:
model = build_unet((IMG_HEIGHT, IMG_WIDTH, 3))
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, Y_train,
                    validation_data=(X_val, Y_val),
                    epochs=20,
                    batch_size=8)

In [None]:
def show_predictions(X, Y_true, model, num=3):
    preds = model.predict(X[:num])
    for i in range(num):
        plt.figure(figsize=(12, 4))

        plt.subplot(1, 3, 1)
        plt.imshow(X[i])
        plt.title('Input')
        plt.axis('off')

        plt.subplot(1, 3, 2)
        plt.imshow(Y_true[i].squeeze(), cmap='gray')
        plt.title('Ground Truth')
        plt.axis('off')

        plt.subplot(1, 3, 3)
        plt.imshow(preds[i].squeeze(), cmap='gray')
        plt.title('Prediction')
        plt.axis('off')

        plt.show()

show_predictions(X_val, Y_val, model)