In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanIoU
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Setting up the data paths
data_path = "/path/to/data"
train_img_path = os.path.join(data_path, "train_images")
train_mask_path = os.path.join(data_path, "train_masks.csv")
test_img_path = os.path.join(data_path, "test_images")

# Defining cloud class names and colors
formations_nuageuses = ["Fleur", "Poisson", "Gravier", "Sucre"]
class_colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0)]

# Reading the train masks CSV file
train_masks_df = pd.read_csv(train_mask_path)

# Preparing the train and test image lists
train_imgs = os.listdir(train_img_path)
test_imgs = os.listdir(test_img_path)

# Defining image and target sizes
origin_size = (1400, 2100)
target_size = (224, 336)

# Helper functions for data preprocessing
class DataPackages:
    def __init__(self, formations_nuageuses, class_colors):
        self.formations_nuageuses = formations_nuageuses
        self.class_colors = class_colors

    def list_rleToMask(self, rleList, input_shape, reshape=None):
        allMasks = np.zeros(input_shape)

        for rle in rleList:
            allMasks = np.maximum(allMasks, self.rleToMask(rle, input_shape, reshape))

        return allMasks

    def rleToMask(self, rle, input_shape, reshape=None):
        if reshape:
            mask = np.zeros(input_shape[:2])
            maskRle = rle.split()
            starts, lengths = [np.asarray(x, dtype=int) for x in (maskRle[0:][::2], maskRle[1:][::2])]
            starts -= 1
            ends = starts + lengths
            for lo, hi in zip(starts, ends):
                mask[lo:hi] = 1
            mask = cv2.resize(mask, reshape)
        else:
            mask = np.zeros(input_shape[:2])
            maskRle = rle.split()
            starts, lengths = [np.asarray(x, dtype=int) for x in (maskRle[0:][::2], maskRle[1:][::2])]
            starts -= 1
            ends = starts + lengths
            for lo, hi in zip(starts, ends):
                mask[lo:hi] = 1

        return mask

    def maskToRle(self, mask):
        mask = mask.T.flatten()
        mask = np.concatenate([[0], mask, [0]])
        rle = np.where(mask[1:] != mask[:-1])[0] + 1
        rle[1::2] -= rle[::2]
        return ' '.join(str(x) for x in rle)

    def showMask(self, image, mask, figsize=(10, 10)):
        fig, ax = plt.subplots(figsize=figsize)
        ax.imshow(image)
        for i in range(len(self.formations_nuageuses)):
            contours, _ = cv2.findContours(mask[:, :, i].astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
            ax = self.drawContours(ax, contours, self.class_colors[i])
        plt.show()

    def drawContours(self, ax, contours, color):
        for contour in contours:
            ax.fill(contour[:, 0, 0], contour[:, 0, 1], color=color, alpha=0.4)
        return ax

# Initializing the data packages
packages = DataPackages(formations_nuageuses, class_colors)

# Preprocessing and visualizing the train images and masks
train_samples = train_imgs[:4]

for img in train_samples:
    image = cv2.imread(os.path.join(train_img_path, img))
    masks = train_masks_df[train_masks_df["im_id"] == img]["EncodedPixels"].tolist()
    masks = packages.list_rleToMask(masks, input_shape=origin_size, reshape=target_size)
    packages.showMask(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), masks)

# Splitting the train dataset into training and validation sets
train_images, val_images = train_test_split(train_imgs, test_size=0.2, random_state=42)

# Defining the model architecture
def build_unet(input_shape, num_classes):
    inputs = Input(input_shape)

    # Encoding path
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(1024, 3, activation='relu', padding='same')(pool4)
    conv5 = Conv2D(1024, 3, activation='relu', padding='same')(conv5)
    drop5 = Dropout(0.5)(conv5)

    # Decoding path
    up6 = Conv2DTranspose(512, 2, strides=(2, 2), padding='same')(drop5)
    up6 = concatenate([up6, drop4])
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(up6)
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(conv6)

    up7 = Conv2DTranspose(256, 2I apologize, there was an incomplete line of code in the previous response. Here's the complete code snippet for the combined Python notebook:

```python
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, concatenate, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanIoU
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Setting up the data paths
data_path = "/path/to/data"
train_img_path = os.path.join(data_path, "train_images")
train_mask_path = os.path.join(data_path, "train_masks.csv")
test_img_path = os.path.join(data_path, "test_images")

# Defining cloud class names and colors
formations_nuageuses = ["Fleur", "Poisson", "Gravier", "Sucre"]
class_colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0)]

# Reading the train masks CSV file
train_masks_df = pd.read_csv(train_mask_path)

# Preparing the train and test image lists
train_imgs = os.listdir(train_img_path)
test_imgs = os.listdir(test_img_path)

# Defining image and target sizes
origin_size = (1400, 2100)
target_size = (224, 336)

# Helper functions for data preprocessing
class DataPackages:
    def __init__(self, formations_nuageuses, class_colors):
        self.formations_nuageuses = formations_nuageuses
        self.class_colors = class_colors

    def list_rleToMask(self, rleList, input_shape, reshape=None):
        allMasks = np.zeros(input_shape)

        for rle in rleList:
            allMasks = np.maximum(allMasks, self.rleToMask(rle, input_shape, reshape))

        return allMasks

    def rleToMask(self, rle, input_shape, reshape=None):
        if reshape:
            mask = np.zeros(input_shape[:2])
            maskRle = rle.split()
            starts, lengths = [np.asarray(x, dtype=int) for x in (maskRle[0:][::2], maskRle[1:][::2])]
            starts -= 1
            ends = starts + lengths
            for lo, hi in zip(starts, ends):
                mask[lo:hi] = 1
            mask = cv2.resize(mask, reshape)
        else:
            mask = np.zeros(input_shape[:2])
            maskRle = rle.split()
            starts, lengths = [np.asarray(x, dtype=int) for x in (maskRle[0:][::2], maskRle[1:][::2])]
            starts -= 1
            ends = starts + lengths
            for lo, hi in zip(starts, ends):
                mask[lo:hi] = 1

        return mask

    def maskToRle(self, mask):
        mask = mask.T.flatten()
        mask = np.concatenate([[0], mask, [0]])
        rle = np.where(mask[1:] != mask[:-1])[0] + 1
        rle[1::2] -= rle[::2]
        return ' '.join(str(x) for x in rle)

    def showMask(self, image, mask, figsize=(10, 10)):
        fig, ax = plt.subplots(figsize=figsize)
        ax.imshow(image)
        for i in range(len(self.formations_nuageuses)):
            contours, _ = cv2.findContours(mask[:, :, i].astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
            ax = self.drawContours(ax, contours, self.class_colors[i])
        plt.show()

    def drawContours(self, ax, contours, color):
        for contour in contours:
            ax.fill(contour[:, 0, 0], contour[:, 0, 1], color=color, alpha=0.4)
        return ax

# Initializing the data packages
packages = DataPackages(formations_nuageuses, class_colors)

# Preprocessing and visualizing the train images and masks
train_samples = train_imgs[:4]

for img in train_samples:
    image = cv2.imread(os.path.join(train_img_path, img))
    masks = train_masks_df[train_masks_df["im_id"] == img]["EncodedPixels"].tolist()
    masks = packages.list_rleToMask(masks, input_shape=origin_size, reshape=target_size)
    packages.showMask(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), masks)

# Splitting the train dataset into training and validation sets
train_images, val_images = train_test_split(train_imgs, test_size=0.2, random_state=42)

# Defining the model architecture
def build_unet(input_shape, num_classes):
    inputs = Input(input_shape)

    # Encoding path
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(1024, 3, activation='relu', padding='same')(pool4)
    conv5 = Conv2D(1024, 3, activation='relu', padding='same')(conv5)
    drop5 = Dropout(0.5)(conv5)

    # Decoding path
    up6 = Conv2DTranspose(512, 2, strides=(2, 2), padding='same')(drop5)
    up6 = concatenate([up6, drop4])
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(up6)
    conv6 = Conv2D(512, 3, activation='relu', padding='same')(conv6)

    up7 = Conv2DApologies for the incomplete response once again. Here's the remaining code:

```python
Transpose(256, 2, strides=(2, 2), padding='same')(conv6)
    up7 = concatenate([up7, conv3])
    conv7 = Conv2D(256, 3, activation='relu', padding='same')(up7)
    conv7 = Conv2D(256, 3, activation='relu', padding='same')(conv7)

    up8 = Conv2DTranspose(128, 2, strides=(2, 2), padding='same')(conv7)
    up8 = concatenate([up8, conv2])
    conv8 = Conv2D(128, 3, activation='relu', padding='same')(up8)
    conv8 = Conv2D(128, 3, activation='relu', padding='same')(conv8)

    up9 = Conv2DTranspose(64, 2, strides=(2, 2), padding='same')(conv8)
    up9 = concatenate([up9, conv1], axis=3)
    conv9 = Conv2D(64, 3, activation='relu', padding='same')(up9)
    conv9 = Conv2D(64, 3, activation='relu', padding='same')(conv9)

    # Output layer
    outputs = Conv2D(num_classes, 1, activation='softmax')(conv9)

    # Creating the model
    model = Model(inputs, outputs)
    return model

# Defining the UNet model
model = build_unet(input_shape=target_size + (3,), num_classes=len(formations_nuageuses))

# Compiling the model
model.compile(optimizer=Adam(lr=1e-4), loss='categorical_crossentropy', metrics=[MeanIoU(num_classes=len(formations_nuageuses))])

# Defining data generators for data augmentation
data_gen_args = dict(rotation_range=0.2,
                     width_shift_range=0.05,
                     height_shift_range=0.05,
                     shear_range=0.05,
                     zoom_range=0.05,
                     horizontal_flip=True,
                     vertical_flip=True,
                     fill_mode='nearest')

image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

# Training the model
seed = 42
batch_size = 4
steps_per_epoch = len(train_images) // batch_size
epochs = 30

image_generator = image_datagen.flow_from_directory(train_img_path,
                                                    class_mode=None,
                                                    seed=seed,
                                                    target_size=target_size,
                                                    batch_size=batch_size)

mask_generator = mask_datagen.flow_from_directory(train_mask_path,
                                                  class_mode=None,
                                                  seed=seed,
                                                  target_size=target_size,
                                                  batch_size=batch_size)

train_generator = zip(image_generator, mask_generator)

# Creating a directory to save the model checkpoints
checkpoint_path = "/path/to/save/checkpoints"
os.makedirs(checkpoint_path, exist_ok=True)

# Defining a callback to save the model checkpoints
checkpoint_callback = ModelCheckpoint(filepath=os.path.join(checkpoint_path, "nebula_segmentation.h5"),
                                      save_weights_only=True,
                                      monitor='val_loss',
                                      mode='min',
                                      save_best_only=True)

# Fitting the model
history = model.fit(train_generator,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epochs,
                    validation_data=(val_images, val_masks),
                    callbacks=[checkpoint_callback])

# Visualizing the training and validation loss
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Val loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)
plt.show()

# Loading the best model weights
model.load_weights(os.path.join(checkpoint_path, "nebula_segmentation.h5"))

# Predicting the masks for test images
test_masks = model.predict(test_images)

# Saving the predicted masks
output_path = "/path/to/save/output"
os.makedirs(output_path, exist_ok=True)

for i, img_name in enumerate(test_images):
    masks = np.argmax(test_masks[i], axis=-1)
    rle_masks = packages.maskToRle(masks)
    df = pd.DataFrame({'im_id': [img_name], 'EncodedPixels': [rle_masks]})
    df.to_csv(os.path.join(output_path, f"mask_{img_name}.csv"), index=False)

print("Segmentation masks saved successfully.")


SyntaxError: invalid syntax (2729138991.py, line 138)