In [6]:
import os
import numpy as np
from PIL import Image
from pathlib import Path

# مسیر پوشه ورودی و خروجی
annot_dir = Path("annots")         # پوشه ماسک‌های رنگی
output_dir = Path("masks_numeric") # پوشه ذخیره ماسک عددی
output_dir.mkdir(parents=True, exist_ok=True)

# رنگ‌های کلاس‌ها (دقیقاً مطابق colors.txt)
CLASS_COLORS = [
    [0, 0, 0],         # background
    [190, 62, 204],    # Endo-Peritoneum
    [93, 89, 254],     # Endo-Ovar
    [145, 210, 138],   # Endo-TIE
    [238, 236, 50]     # Endo-Uterus
]

def rgb_to_class(mask_rgb, tolerance=5):
    """تبدیل ماسک RGB به ماسک عددی با تلورانس رنگ"""
    mask_rgb = np.array(mask_rgb)
    mask_class = np.zeros(mask_rgb.shape[:2], dtype=np.uint8)
    for i, color in enumerate(CLASS_COLORS):
        matches = np.all(np.abs(mask_rgb - color) <= tolerance, axis=-1)
        mask_class[matches] = i
    return mask_class

# پردازش و ذخیره همه فایل‌های PNG
for file in annot_dir.glob("*.png"):
    with Image.open(file) as img:
        mask_class = rgb_to_class(img)
        Image.fromarray(mask_class).save(output_dir / file.name)

print(f"✅ تمام ماسک‌ها به صورت عددی ذخیره شدند در: {output_dir}")


✅ تمام ماسک‌ها به صورت عددی ذخیره شدند در: masks_numeric


In [7]:
# لیبل‌ها از فایل labels.txt بخونیم (اگر داری)
with open("labels.txt", "r") as f:
    CLASS_LABELS = [line.strip() for line in f.readlines()]

mask_dir = Path("masks_numeric")

for file in sorted(mask_dir.glob("*.png")):
    with Image.open(file) as img:
        mask = np.array(img)
        unique = np.unique(mask)
        class_names = [CLASS_LABELS[i] for i in unique if i < len(CLASS_LABELS)]
        print(f"🟩 {file.name}: {unique.tolist()} => {class_names}")


🟩 c_100_v_(video_3062.mp4)_f_1458.png: [0, 4] => ['background', '6.1.1.4_Endo-Uterus']
🟩 c_102_v_(video_3119.mp4)_f_1311.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_102_v_(video_3119.mp4)_f_185.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_102_v_(video_3124.mp4)_f_97.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_102_v_(video_3130.mp4)_f_0.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_105_v_(video_3214.mp4)_f_2954.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_108_v_(video_3312.mp4)_f_619.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_10_v_(video_182.mp4)_f_494.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_10_v_(video_184.mp4)_f_22.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_10_v_(video_185.mp4)_f_112.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_10_v_(video_185.mp4)_f_176.png: [0, 1] => ['background', '6.1.1.1_Endo-Peritoneum']
🟩 c_10_v_(video_186.mp4

In [8]:
import os

image_dir = "frames"
mask_dir = "masks_numeric"

# همه فایل‌های jpg رو می‌گیریم
image_files = [f for f in os.listdir(image_dir) if f.endswith(".jpg")]

matched_images = []
matched_masks = []

for img_file in image_files:
    mask_file = img_file.replace(".jpg", ".png")
    mask_path = os.path.join(mask_dir, mask_file)
    
    if os.path.exists(mask_path):
        matched_images.append(os.path.join(image_dir, img_file))
        matched_masks.append(mask_path)

print(f"✅ {len(matched_images)} تصویر دارای ماسک هستند.")
print(f"❌ {len(image_files) - len(matched_images)} تصویر بدون ماسک هستند.")


✅ 373 تصویر دارای ماسک هستند.
❌ 0 تصویر بدون ماسک هستند.


In [9]:
import tensorflow as tf

def random_augment(img, mask):
    # فلیپ افقی
    if tf.random.uniform(()) > 0.5:
        img = tf.image.flip_left_right(img)
        mask = tf.image.flip_left_right(mask)

    # فلیپ عمودی
    if tf.random.uniform(()) > 0.5:
        img = tf.image.flip_up_down(img)
        mask = tf.image.flip_up_down(mask)

    # چرخش 90 درجه (تصادفی بین 0 و 3 بار)
    k = tf.random.uniform((), minval=0, maxval=4, dtype=tf.int32)
    img = tf.image.rot90(img, k)
    mask = tf.image.rot90(mask, k)

    # تغییر روشنایی (فقط روی تصویر)
    img = tf.image.random_brightness(img, max_delta=0.2)

    return img, mask


In [10]:
import random

# ترکیب تصاویر و ماسک‌ها
combined = list(zip(matched_images, matched_masks))
random.seed(42)
random.shuffle(combined)

# تقسیم ۸۰٪ آموزش - ۲۰٪ اعتبارسنجی
split_idx = int(0.8 * len(combined))
train_files = combined[:split_idx]
val_files = combined[split_idx:]

# باز کردن دوباره به صورت جدا
train_image_files, train_mask_files = zip(*train_files)
val_image_files, val_mask_files = zip(*val_files)


print(f"📦 آموزش: {len(train_image_files)} نمونه")
print(f"🧪 اعتبارسنجی: {len(train_image_files)} نمونه")
print(train_image_files)
print(train_mask_files)


📦 آموزش: 298 نمونه
🧪 اعتبارسنجی: 298 نمونه
('frames/c_40_v_(video_1281.mp4)_f_262.jpg', 'frames/c_34_v_(video_1099.mp4)_f_5.jpg', 'frames/c_23_v_(video_672.mp4)_f_397.jpg', 'frames/c_14_v_(video_401.mp4)_f_910.jpg', 'frames/c_117_v_(video_3530.mp4)_f_811.jpg', 'frames/c_14_v_(video_400.mp4)_f_810.jpg', 'frames/c_7_v_(video_130.mp4)_f_411.jpg', 'frames/c_8_v_(video_148.mp4)_f_185.jpg', 'frames/c_141_v_(video_4257.mp4)_f_115.jpg', 'frames/c_32_v_(video_1044.mp4)_f_1545.jpg', 'frames/c_15_v_(video_450.mp4)_f_623.jpg', 'frames/c_14_v_(video_401.mp4)_f_90.jpg', 'frames/c_49_v_(video_1561.mp4)_f_1248.jpg', 'frames/c_97_v_(video_2915.mp4)_f_114.jpg', 'frames/c_40_v_(video_1286.mp4)_f_126.jpg', 'frames/c_61_v_(video_1990.mp4)_f_194.jpg', 'frames/c_12_v_(video_370.mp4)_f_25.jpg', 'frames/c_174_v_(video_5312.mp4)_f_1319.jpg', 'frames/c_46_v_(video_1406.mp4)_f_81.jpg', 'frames/c_14_v_(video_401.mp4)_f_253.jpg', 'frames/c_4_v_(video_48.mp4)_f_402.jpg', 'frames/c_141_v_(video_4253.mp4)_f_55.jpg', '

In [11]:
import tensorflow.keras.backend as K

def dice_coefficient(y_true, y_pred, smooth=1e-6):
    y_true_f = K.flatten(tf.one_hot(tf.cast(tf.squeeze(y_true, axis=-1), tf.int32), NUM_CLASSES))
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def iou_metric(y_true, y_pred, smooth=1e-6):
    y_true_f = K.flatten(tf.one_hot(tf.cast(tf.squeeze(y_true, axis=-1), tf.int32), NUM_CLASSES))
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    union = K.sum(y_true_f) + K.sum(y_pred_f) - intersection
    return (intersection + smooth) / (union + smooth)


In [12]:
import os
import random
import numpy as np
import tensorflow as tf
from PIL import Image
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt

# ---------- تنظیمات ----------
IMG_SIZE = 256
NUM_CLASSES = 5
BATCH_SIZE = 4
EPOCHS = 50
SEED = 42

# ---------- مسیرها ----------
IMAGE_DIR = "frames"
MASK_DIR = "masks_numeric"

# ---------- خواندن و تقسیم داده ----------
image_files = sorted([os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR)])
mask_files = sorted([os.path.join(MASK_DIR, f) for f in os.listdir(MASK_DIR)])

assert len(image_files) == len(mask_files), "Mismatch in image and mask count."

combined = list(zip(image_files, mask_files))
random.seed(SEED)
random.shuffle(combined)

split_idx = int(0.8 * len(combined))
train_files = combined[:split_idx]
val_files = combined[split_idx:]



class_weights_tensor = tf.constant([
    0.214, 6.454, 11.42, 13.196, 104.479
], dtype=tf.float32)

def weighted_sparse_categorical_loss(y_true, y_pred):
    y_true = tf.cast(tf.squeeze(y_true, axis=-1), tf.int32)
    y_pred = tf.clip_by_value(y_pred, 1e-7, 1 - 1e-7)
    scce = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False)
    weights = tf.gather(class_weights_tensor, y_true)
    weighted_loss = scce * weights
    return tf.reduce_mean(weighted_loss)
import tensorflow as tf

IMG_SIZE = 256

def process_path(image_path, mask_path):
    # خواندن فایل تصویر
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # یا decode_png اگر فرمت png است
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = tf.cast(image, tf.float32) / 255.0

    # خواندن فایل ماسک
    mask = tf.io.read_file(mask_path)
    mask = tf.image.decode_png(mask, channels=1)
    mask = tf.image.resize(mask, [IMG_SIZE, IMG_SIZE], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    # ماسک باید صحیح integer باشه، پس cast به uint8 یا int32
    mask = tf.cast(mask, tf.uint8)

    return image, mask

def build_dataset(image_files, mask_files, batch_size, shuffle=True, augment=False):
    dataset = tf.data.Dataset.from_tensor_slices((image_files, mask_files))
    dataset = dataset.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=1000)
    
    # اگر augment داری، اینجا می‌تونی augment اضافه کنی
    if augment:
        dataset = dataset.map(random_augment, num_parallel_calls=tf.data.AUTOTUNE)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset


train_image_files = list(train_image_files)
train_mask_files = list(train_mask_files)
val_image_files = list(val_image_files)
val_mask_files = list(val_mask_files)

train_dataset = build_dataset(train_image_files, train_mask_files, batch_size=BATCH_SIZE, augment=True)
val_dataset = build_dataset(val_image_files, val_mask_files, batch_size=BATCH_SIZE, augment=False)

# ---------- تعریف مدل UNet بزرگ‌تر ----------
def unet_model(input_size=(IMG_SIZE, IMG_SIZE, 3), num_classes=NUM_CLASSES):
    inputs = tf.keras.Input(input_size)

    # Encoder
    c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D()(c1)

    c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D()(c2)

    c3 = layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, 3, activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D()(c3)

    # Bottleneck
    b = layers.Conv2D(512, 3, activation='relu', padding='same')(p3)
    b = layers.Conv2D(512, 3, activation='relu', padding='same')(b)

    # Decoder
    u3 = layers.UpSampling2D()(b)
    u3 = layers.concatenate([u3, c3])
    c4 = layers.Conv2D(256, 3, activation='relu', padding='same')(u3)
    c4 = layers.Conv2D(256, 3, activation='relu', padding='same')(c4)

    u2 = layers.UpSampling2D()(c4)
    u2 = layers.concatenate([u2, c2])
    c5 = layers.Conv2D(128, 3, activation='relu', padding='same')(u2)
    c5 = layers.Conv2D(128, 3, activation='relu', padding='same')(c5)

    u1 = layers.UpSampling2D()(c5)
    u1 = layers.concatenate([u1, c1])
    c6 = layers.Conv2D(64, 3, activation='relu', padding='same')(u1)
    c6 = layers.Conv2D(64, 3, activation='relu', padding='same')(c6)

    outputs = layers.Conv2D(num_classes, 1, activation='softmax')(c6)

    return models.Model(inputs, outputs)

# ---------- کال‌بک‌ها ----------
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint_cb = callbacks.ModelCheckpoint("unet_checkpoint_best.h5", save_best_only=True, monitor='val_loss')
tensorboard_cb = callbacks.TensorBoard(log_dir="logs_unet", histogram_freq=1, write_images=True)

# ---------- آموزش مدل ----------
model = unet_model()
from tensorflow.keras.optimizers import Adam

learning_rate = 1e-4  # مقدار پیشنهادی، می‌تونی کم و زیاد کنی
optimizer = Adam(learning_rate=learning_rate)

model.compile(
    optimizer=optimizer,
    loss=weighted_sparse_categorical_loss,
    metrics=['accuracy', dice_coefficient, iou_metric]
)



I0000 00:00:1748545906.285675  564220 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1748545906.285735  564220 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=EPOCHS,
    callbacks=[early_stop, checkpoint_cb, tensorboard_cb]
)

# ---------- ذخیره مدل ----------
model.save("unet_endometriosis_model_final.keras")

# ---------- ارزیابی روی داده تست ----------


Epoch 1/50
[1m 3/75[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m18:03[0m 15s/step - accuracy: 0.3396 - dice_coefficient: 0.2066 - iou_metric: 0.1152 - loss: 1.0759

In [None]:
def visualize_prediction(dataset, model, num_samples=3):
    for image, true_mask in dataset.take(num_samples):
        pred = model.predict(image)
        pred_mask = tf.argmax(pred, axis=-1).numpy()

        batch_size = image.shape[0]
        for i in range(batch_size):
            plt.figure(figsize=(12, 4))
            plt.subplot(1, 3, 1)
            plt.title("Input Image")
            plt.imshow(image[i])
            plt.axis("off")

            plt.subplot(1, 3, 2)
            plt.title("Ground Truth")
            plt.imshow(true_mask[i, ..., 0], cmap='jet', vmin=0, vmax=NUM_CLASSES-1)
            plt.axis("off")

            plt.subplot(1, 3, 3)
            plt.title("Predicted Mask")
            plt.imshow(pred_mask[i], cmap='jet', vmin=0, vmax=NUM_CLASSES-1)
            plt.axis("off")
            plt.tight_layout()
            plt.show()

# نمایش نمونه‌های تست
visualize_prediction(val_dataset, model)

In [None]:
from tensorflow import keras

# بارگذاری بهترین مدل ذخیره‌شده توسط ModelCheckpoint
model = keras.models.load_model("unet_checkpoint_best.h5")  # یا .keras اگه تغییرش دادی


In [None]:
import os
from PIL import Image
import numpy as np

mask_dir = "masks_numeric"
all_classes = set()

for f in os.listdir(mask_dir):
    if f.endswith(".png"):
        mask = np.array(Image.open(os.path.join(mask_dir, f)))
        classes = np.unique(mask)
        all_classes.update(classes)

print("📊 تمام کلاس‌های موجود در ماسک‌ها:", sorted(all_classes))


In [None]:
def apply_color_map(mask):
    rgb_mask = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
    for i, color in enumerate(CLASS_COLORS):
        rgb_mask[mask == i] = color
    return rgb_mask

def overlay_mask_on_image(image, mask_rgb, alpha=0.5):
    overlay = (image * 255).astype(np.uint8)
    return ((1 - alpha) * overlay + alpha * mask_rgb).astype(np.uint8)

def visualize_prediction2(dataset, model, num_samples=3):
    for image, true_mask in dataset.take(num_samples):
        pred = model.predict(image)
        pred_mask = tf.argmax(pred, axis=-1).numpy()

        batch_size = image.shape[0]
        for i in range(batch_size):
            img_np = image[i].numpy()
            true_mask_rgb = apply_color_map(true_mask[i, ..., 0].numpy())
            pred_mask_rgb = apply_color_map(pred_mask[i])
            overlay_pred = overlay_mask_on_image(img_np, pred_mask_rgb)

            plt.figure(figsize=(18, 5))
            plt.subplot(1, 3, 1)
            plt.title("Input Image")
            plt.imshow(img_np)
            plt.axis("off")

            plt.subplot(1, 3, 2)
            plt.title("Ground Truth Mask")
            plt.imshow(true_mask_rgb)
            plt.axis("off")

            plt.subplot(1, 3, 3)
            plt.title("Overlay: Prediction on Image")
            plt.imshow(overlay_pred)
            plt.axis("off")

            plt.tight_layout()
            plt.show()

# نمایش نمونه‌های تست
visualize_prediction2(val_dataset, model)


In [None]:
import numpy as np

def analyze_prediction(pred_mask):
    unique_classes = np.unique(pred_mask)
    print("🧪 کلاس‌های پیش‌بینی شده:", unique_classes)

# فرض: pred_mask از visualize_prediction گرفته شده
# مثال روی یک نمونه تست:
for image, true_mask in val_dataset.take(1):
    pred = model.predict(image)
    pred_mask = tf.argmax(pred, axis=-1).numpy()
    analyze_prediction(pred_mask[0])
