In [1]:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import MeanIoU


2024-11-01 20:07:42.194581: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-01 20:07:42.202537: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-01 20:07:42.214370: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730488062.248932  527188 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730488062.258417  527188 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-01 20:07:42.293648: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
def load_images(image_dir, mask_dir, target_size=(128, 128)):
    image_files = sorted(os.listdir(image_dir))
    mask_files = sorted(os.listdir(mask_dir))
    
    images = []
    masks = []
    
    for img_file, mask_file in zip(image_files, mask_files):
        img = load_img(os.path.join(image_dir, img_file), target_size=target_size)
        mask = load_img(os.path.join(mask_dir, mask_file), target_size=target_size, color_mode='grayscale')
        
        images.append(img_to_array(img) / 255.0)
        masks.append(img_to_array(mask) / 255.0)
        
    return np.array(images), np.array(masks)

In [4]:
# Load data
image_dir = '../data/VOCdevkit/VOC2012/JPEGImages'
mask_dir = '../data/VOCdevkit/VOC2012/SegmentationClass'
images, masks = load_images(image_dir, mask_dir)

In [5]:
def unet_model(input_shape=(128, 128, 3), num_classes=1):
    inputs = layers.Input(shape=input_shape)
    
    # Encoder
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = layers.MaxPooling2D((2, 2))(conv1)
    
    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = layers.MaxPooling2D((2, 2))(conv2)
    
    conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = layers.MaxPooling2D((2, 2))(conv3)
    
    conv4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = layers.MaxPooling2D((2, 2))(conv4)
    
    # Bottleneck
    conv5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(conv5)
    
    # Decoder
    up6 = layers.Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(conv5)
    up6 = layers.concatenate([up6, conv4])
    conv6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(up6)
    conv6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv6)
    
    up7 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv6)
    up7 = layers.concatenate([up7, conv3])
    conv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(up7)
    conv7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv7)
    
    up8 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv7)
    up8 = layers.concatenate([up8, conv2])
    conv8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(up8)
    conv8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv8)
    
    up9 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv8)
    up9 = layers.concatenate([up9, conv1])
    conv9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(up9)
    conv9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv9)
    
    outputs = layers.Conv2D(num_classes, (1, 1), activation='sigmoid')(conv9)
    
    model = Model(inputs, outputs)
    return model

In [6]:
# Define model, compile with binary cross-entropy, and add IoU as a metric
model = unet_model(input_shape=(128, 128, 3), num_classes=1)
model.compile(optimizer=Adam(), loss=BinaryCrossentropy(), metrics=[MeanIoU(num_classes=2)])

# TODO: correct classes

# Train the model
history = model.fit(images, masks, epochs=50, batch_size=8, validation_split=0.2)

2024-11-01 20:08:36.585304: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/50


2024-11-01 20:08:39.145209: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 458096640 exceeds 10% of free system memory.
2024-11-01 20:08:39.463977: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 152698880 exceeds 10% of free system memory.


[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - loss: 0.4365 - mean_io_u: 1.0000

2024-11-01 20:37:21.764154: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 114622464 exceeds 10% of free system memory.


[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1833s[0m 6s/step - loss: 0.4364 - mean_io_u: 1.0000 - val_loss: 0.5686 - val_mean_io_u: 1.0000
Epoch 2/50
[1m  6/292[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m28:47[0m 6s/step - loss: 0.4232 - mean_io_u: 1.0000

KeyboardInterrupt: 

In [None]:
# Evaluate model performance
loss, iou = model.evaluate(images, masks)
print(f"Test Loss: {loss}, Test IoU: {iou}")

# Save model
model.save('unet_model.h5')