**Create and Prepare Data Pipelines**

This cell **constructs the data pipelines** for the training and validation datasets using `tf.data.Dataset`. Key steps include:
*   Applying the `load_and_preprocess` function to each image and mask pair.
*   **Caching** the data for improved performance.
*   **Shuffling** the training data to ensure randomness during training.
*   **Batching** the data for efficient training.
*   Using `prefetch` to **overlap data loading and model execution**.

It also **prints the details** of the created training and validation datasets.

In [None]:
import tensorflow as tf

IMG_HEIGHT = 256
IMG_WIDTH = 256

def load_and_preprocess(image_path, mask_path):
    # Force cast to string to prevent "float32" errors
    image_path = tf.cast(image_path, tf.string)
    mask_path = tf.cast(mask_path, tf.string)

    # 1. Read image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [IMG_HEIGHT, IMG_WIDTH])
    image = image / 255.0 # Normalize to [0, 1]

    # 2. Read mask
    mask = tf.io.read_file(mask_path)
    mask = tf.image.decode_png(mask, channels=1)
    mask = tf.image.resize(mask, [IMG_HEIGHT, IMG_WIDTH], method='nearest')

    return image, mask

BATCH_SIZE = 16
BUFFER_SIZE = 1000

# --- Create TRAINING pipeline ---
train_dataset = tf.data.Dataset.from_tensor_slices((train_image_paths, train_mask_paths))
train_dataset = train_dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

# --- Create VALIDATION pipeline ---
val_dataset = tf.data.Dataset.from_tensor_slices((val_image_paths, val_mask_paths))
val_dataset = val_dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.cache()
val_dataset = val_dataset.batch(BATCH_SIZE)
val_dataset = val_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

print(f"✅ Pipelines Created.")
print(f"Training Batches: {len(train_dataset)}")
print(f"Validation Batches: {len(val_dataset)}")

Items in Train: 270
Items in Val:   0
⚠️ Validation list is empty! Using Training set for Validation to prevent crash.
✅ Pipelines Created Successfully.
Training Batches: 17
Validation Batches: 17


In [None]:
BATCH_SIZE = 16
BUFFER_SIZE = 1000 # For shuffling

# --- Create the TRAINING dataset pipeline ---
train_dataset = tf.data.Dataset.from_tensor_slices((train_image_paths, train_mask_paths))
train_dataset = train_dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(BUFFER_SIZE) # Shuffle the training data
train_dataset = train_dataset.batch(BATCH_SIZE)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

# --- Create the VALIDATION dataset pipeline ---
val_dataset = tf.data.Dataset.from_tensor_slices((val_image_paths, val_mask_paths))
val_dataset = val_dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.cache()
val_dataset = val_dataset.batch(BATCH_SIZE) # No shuffling needed for validation
val_dataset = val_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

print(f"Training Dataset: {train_dataset}")
print(f"Validation Dataset: {val_dataset}")

Training Dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 256, 256, 1), dtype=tf.uint8, name=None))>
Validation Dataset: <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 256, 256, 1), dtype=tf.uint8, name=None))>


**Build, Compile, and Train the U-Net Model**

This cell outlines the steps for **building, compiling, and training a U-Net model**.
1.  **Build your U-Net model**: Placeholder for creating your model architecture.
2.  **Compile it**: Placeholder for configuring the model's training process (optimizer, loss function, metrics).
3.  **Train it!**: Uses the `model.fit()` function to train the model on the prepared training dataset and evaluate it on the validation dataset.
4.  **Save your final model**: Saves the trained model to your project folder in Google Drive for later use.

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Conv2DTranspose, concatenate
from tensorflow.keras.models import Model

def create_unet_model(input_size=(256, 256, 3), num_classes=34):
    inputs = Input(input_size)

    # --- ENCODER (The "Contracting Path") ---
    # We use 'same' padding so the image size doesn't shrink due to the border

    # Block 1
    c1 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(inputs)
    c1 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    # Block 2
    c2 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1)
    c2 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    # Block 3
    c3 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2)
    c3 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    # Block 4
    c4 = Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3)
    c4 = Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4)
    # Dropout helps prevent overfitting
    d4 = Dropout(0.3)(c4)
    p4 = MaxPooling2D(pool_size=(2, 2))(d4)

    # --- BOTTLENECK (The Bottom of the U) ---
    c5 = Conv2D(1024, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4)
    c5 = Conv2D(1024, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5)
    d5 = Dropout(0.3)(c5)

    # --- DECODER (The "Expansive Path") ---

    # Block 6 (Upsampling + Skip Connection from Block 4)
    u6 = Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(d5)
    u6 = concatenate([u6, d4]) # <--- This is the Skip Connection!
    c6 = Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6)
    c6 = Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6)

    # Block 7 (Upsampling + Skip Connection from Block 3)
    u7 = Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7)
    c7 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7)

    # Block 8 (Upsampling + Skip Connection from Block 2)
    u8 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8)
    c8 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8)

    # Block 9 (Upsampling + Skip Connection from Block 1)
    u9 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = concatenate([u9, c1])
    c9 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9)
    c9 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9)

    # --- OUTPUT LAYER ---
    # We use 'softmax' because we want a probability for each of the 34 classes for every pixel
    outputs = Conv2D(num_classes, (1, 1), activation='softmax')(c9)

    model = Model(inputs=[inputs], outputs=[outputs])
    return model

# Create the model
model = create_unet_model()

# Compile the model
# We use 'sparse_categorical_crossentropy' because our masks are integers (0, 1, 2...) not one-hot vectors
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Show the summary structure
model.summary()