In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import tensorflow as tf
from tensorflow import keras
import cv2
import glob
import numpy as np

tf.config.experimental.list_logical_devices()

[LogicalDevice(name='/device:CPU:0', device_type='CPU'),
 LogicalDevice(name='/device:GPU:0', device_type='GPU')]

In [2]:
trainX_paths = sorted(glob.glob('floods_dataset/train/images/*.jpg'))
trainY_paths = sorted(glob.glob('floods_dataset/train/masks/*.png'))

valX_paths = sorted(glob.glob('floods_dataset/validation/images/*.jpg'))
valY_paths = sorted(glob.glob('floods_dataset/validation/masks/*.png'))

testX_paths = sorted(glob.glob('floods_dataset/test/images/*.jpg'))
testY_paths = sorted(glob.glob('floods_dataset/test/masks/*.png'))

print(len(trainX_paths), len(trainY_paths))
print(len(valX_paths), len(valY_paths))
print(len(testX_paths), len(testY_paths))

1200 1200
180 180
360 360


In [3]:
def read_image(path):
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = x / 255.0
    x = x.astype(np.float32)
    return x

def read_mask(path):
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = x / 255.0
    x = np.expand_dims(x, axis=-1)
    x = x.astype(np.float32)
    
    return x

def preprocess(x, y):
    def f(x, y):
        x = x.decode()
        y = y.decode()
        
        x = read_image(x)
        y = read_mask(y)
        
        return x, y
    
    image, mask = tf.numpy_function(f, [x, y], [tf.float32, tf.float32])
    image.set_shape([224, 224, 3])
    mask.set_shape([224, 224, 1])
    
    return image, mask

def tf_dataset(x, y, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.map(preprocess)
    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(2)
    return dataset

In [4]:
train_dataset = tf_dataset(trainX_paths, trainY_paths, batch=8)
val_dataset = tf_dataset(valX_paths, valY_paths, batch=8)
test_dataset = tf_dataset(testX_paths, testY_paths, batch=8)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [5]:
for x, y in train_dataset:
    print(x.shape, y.shape)
    break

(8, 224, 224, 3) (8, 224, 224, 1)
