In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_io as tfio
#import gcsfs

In [2]:
import pandas as pd

In [3]:
fs = gcsfs.GCSFileSystem()

In [4]:
# For autotuning of the prefething buffer
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [5]:
SEED = 42

In [6]:
TRAIN_DATA_PATH= "gs://drivendata-snowcast/cloud_detection/data/gcs_train_data.csv"
VAL_DATA_PATH= "gs://drivendata-snowcast/cloud_detection/data/gcs_val_data.csv"

In [7]:
# Number of feature folders
len(fs.ls("gs://drivendata-snowcast/cloud_detection/data/train_features"))

11748

In [8]:
# Number of masks
len(fs.ls("gs://drivendata-snowcast/cloud_detection/data/train_labels"))

11748

In [9]:
IMG_SIZE = 192
N_CHANNELS = 4
N_CLASSES = 1

In [10]:
file_paths = TRAIN_DATA_PATH

In [11]:
train_dataset = tf.data.TextLineDataset(filenames=file_paths)

2022-01-09 09:44:27.763646: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [13]:
list(train_dataset.skip(1).take(5))

2022-01-08 23:19:17.123753: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


[<tf.Tensor: shape=(), dtype=string, numpy=b'adwp;gs://drivendata-snowcast/cloud_detection/data/train_features/adwp/B02.tif;gs://drivendata-snowcast/cloud_detection/data/train_features/adwp/B03.tif;gs://drivendata-snowcast/cloud_detection/data/train_features/adwp/B04.tif;gs://drivendata-snowcast/cloud_detection/data/train_features/adwp/B08.tif;gs://drivendata-snowcast/cloud_detection/data/train_labels/adwp.tif'>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'adwu;gs://drivendata-snowcast/cloud_detection/data/train_features/adwu/B02.tif;gs://drivendata-snowcast/cloud_detection/data/train_features/adwu/B03.tif;gs://drivendata-snowcast/cloud_detection/data/train_features/adwu/B04.tif;gs://drivendata-snowcast/cloud_detection/data/train_features/adwu/B08.tif;gs://drivendata-snowcast/cloud_detection/data/train_labels/adwu.tif'>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'adwz;gs://drivendata-snowcast/cloud_detection/data/train_features/adwz/B02.tif;gs://drivendata-snowcast/cloud_detection/d

In [12]:
record_defaults = ["chip_id","B02_path","B03_path","B04_path","B08_path","label_path"]

In [13]:
@tf.function
def load_image(img_path):
    image = tf.io.read_file(img_path)
    image = tfio.experimental.image.decode_tiff(image)
    
    return image

In [14]:
@tf.function
def parser(row_csv):
    # decoding the csv file
    chip_id,B02_path,B03_path,B04_path,B08_path,label_path = tf.io.decode_csv(records = row_csv, record_defaults = ["chip_id","B02_path","B03_path","B04_path","B08_path","label_path"], field_delim=';')
    
    B02_img = load_image(B02_path)
    B03_img = load_image(B03_path)
    B04_img = load_image(B04_path)
    B08_img = load_image(B08_path)
    
    image = tf.concat([B02_img,B03_img,B04_img,B08_img], axis = -1)
    
    label = load_image(label_path)
    
    return image, label
    

In [46]:
tf.constant([4, 6, 8, 10, 12, 14, 16, 18, 20])

<tf.Tensor: shape=(9,), dtype=int32, numpy=array([ 4,  6,  8, 10, 12, 14, 16, 18, 20], dtype=int32)>

In [47]:
help(tf.random.shuffle)

Help on function random_shuffle in module tensorflow.python.ops.random_ops:

random_shuffle(value, seed=None, name=None)
    Randomly shuffles a tensor along its first dimension.
    
    The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
    to one and only one `output[i]`. For example, a mapping that might occur for a
    3x2 tensor is:
    
    ```python
    [[1, 2],       [[5, 6],
     [3, 4],  ==>   [1, 2],
     [5, 6]]        [3, 4]]
    ```
    
    Args:
      value: A Tensor to be shuffled.
      seed: A Python integer. Used to create a random seed for the distribution.
        See
        `tf.random.set_seed`
        for behavior.
      name: A name for the operation (optional).
    
    Returns:
      A tensor of same shape and type as `value`, shuffled along its first
      dimension.



In [50]:
tf.random.shuffle(tf.constant([4, 6, 8, 10, 12, 14, 16, 18, 20]))[0]

<tf.Tensor: shape=(), dtype=int32, numpy=6>

In [25]:
import numpy as np

import tensorflow as tf
import tensorflow_io as tfio
import tensorflow_addons as tfa
import tensorflow.keras.backend as K

h,w = 512,512

@tf.function
def decode_img(img_path_tensor):
    img_bytes = tf.io.read_file(filename = img_path_tensor)
    img_tensor = tfio.experimental.image.decode_tiff(contents = img_bytes)
    img_tensor = img_tensor[:,:,1]
    img_tensor = tf.expand_dims(img_tensor, axis = -1)
    return img_tensor

def preprocessing(csv_row):
    # Decoding the row
    chip_id,b02_path,b03_path,b04_path,b08_path,label_path = tf.io.decode_csv(records = csv_row,record_defaults = ['chip_id', 'B02_path', 'B03_path', 'B04_path', 'B08_path', 'label_path'],field_delim = ";")
    
    # Getting the train image paths
    train_feature_paths = [b02_path,b03_path,b04_path,b08_path]
    # Getting the train image tensors
    train_img_tensor_list = [decode_img(path) for path in train_feature_paths]
    
    image = tf.concat(values = train_img_tensor_list, axis = -1)
    
    # label
    label = decode_img(label_path)
    
    return image,label

@tf.function
def rotate_clk_img_and_msk(img, msk):
    angles_tensor = tf.constant([4, 6, 8, 10, 12, 14, 16, 18, 20])
    angle = tf.random.shuffle(angles_tensor)[0]
    # Image
    img_o = tfa.image.rotate(images = img,angles = angle,interpolation = "nearest",fill_mode = "reflect",fill_value = 0.0)
    # Label
    msk_o = tfa.image.rotate(images = msk,angles = angle,interpolation = "nearest",fill_mode = "reflect",fill_value = 0.0)
    
    return img_o, msk_o

@tf.function
def rotate_cclk_img_and_msk(img, msk):
    angles_tensor = tf.constant([-20, -18, -16, -14, -12, -10, -8, -6, -4])
    angle = tf.random.shuffle(angles_tensor)[0]
    # Image
    img_o = tfa.image.rotate(images = img,angles = angle,interpolation = "nearest",fill_mode = "reflect",fill_value = 0.0)
    # Label
    msk_o = tfa.image.rotate(images = msk,angles = angle,interpolation = "nearest",fill_mode = "reflect",fill_value = 0.0)
    
    return img_o, msk_o

@tf.function
def flipping_img_and_msk(img, msk):
    img_o = tf.image.flip_left_right(img)
    img_o = tf.image.flip_up_down(img)
    
    msk_o = tf.image.flip_left_right(msk)
    msk_o = tf.image.flip_up_down(msk)
    
    return img_o,msk_o

@tf.function
def zoom_img_and_msk(img, msk,height = h,width = w):

    zoom_factor_tensor = tf.constant([1.2, 1.5, 1.8, 2, 2.2, 2.5], dtype=tf.float32)  # currently doesn't have zoom out!
    zoom_factor = tf.random.shuffle(zoom_factor_tensor)[0]
    # print("*-*-*-*-*-*")
    # print(img.shape)
    #h,w,c = img.shape
    h = height
    w = width
    cast_h = tf.cast(h,dtype= tf.float32)
    cast_w = tf.cast(w,dtype= tf.float32)
    

    # width and height of the zoomed image
    zh = tf.math.multiply(zoom_factor, cast_h)
    zh = tf.cast(zh,dtype= tf.int32)
    
    zw = tf.math.multiply(zoom_factor, cast_w)
    zw = tf.cast(zw,dtype= tf.int32)
    
    # zh = int(np.round(zoom_factor * h))
    # zw = int(np.round(zoom_factor * w))

    img = tf.image.resize(img,(zh, zw),preserve_aspect_ratio=False)
    msk = tf.image.resize(msk,(zh, zw),preserve_aspect_ratio=False)
    
    region_tensor = tf.constant([0, 1, 2, 3, 4])
    region = tf.random.shuffle(region_tensor)[0]

    # zooming out
    
    if tf.math.less_equal(zoom_factor, tf.constant(1)):
        outimg = img
        outmsk = msk

    # zooming in
    else:
        # bounding box of the clipped region within the input array
        if tf.math.equal(region, tf.constant(0)):
            outimg = img[0:h, 0:w,:]
            outmsk = msk[0:h, 0:w,:]
        if tf.math.equal(region, tf.constant(1)):
            outimg = img[0:h, zw - w:zw,:]
            outmsk = msk[0:h, zw - w:zw,:]
        if tf.math.equal(region, tf.constant(2)):
            outimg = img[zh - h:zh, 0:w,:]
            outmsk = msk[zh - h:zh, 0:w,:]
        if tf.math.equal(region, tf.constant(3)):
            outimg = img[zh - h:zh, zw - w:zw,:]
            outmsk = msk[zh - h:zh, zw - w:zw,:]
        if tf.math.equal(region, tf.constant(4)):
            
            marh = tf.math.floordiv( h, tf.constant(2))
            marw = tf.math.floordiv( w, tf.constant(2))
            
            zh_div = tf.math.floordiv( zh, tf.constant(2))
            zw_div = tf.math.floordiv( zw, tf.constant(2))
            
            zh_div_add = tf.math.add( zh_div, marh)
            zh_div_minus = tf.math.subtract( zh_div, marh)
            
            zw_div_add = tf.math.add( zw_div, marw)
            zw_div_minus = tf.math.subtract( zw_div, marw)
            
            outimg = img[zh_div_minus:zw_div_add, zw_div_minus:zw_div_add,:]
            outmsk = msk[zh_div_minus:zw_div_add, zw_div_minus:zw_div_add,:]
            
            # outimg = img[(zh // 2 - marh):(zh // 2 + marh), (zw // 2 - marw):(zw // 2 + marw),:]
            # outmsk = msk[(zh // 2 - marh):(zh // 2 + marh), (zw // 2 - marw):(zw // 2 + marw),:]

    # to make sure the output is in the same size of the input
    img_o = tf.image.resize(outimg,(h, w),preserve_aspect_ratio=False)
    msk_o = tf.image.resize(outmsk,(h, w),preserve_aspect_ratio=False)
    return img_o, msk_o

# def data_augmentation(img,mask):
    
#     # random coin flip
#     coin_tensor = tf.constant([1,2])
    
#     head = tf.constant(1)
    
#     rnd_flip = tf.random.shuffle(coin_tensor)[0]
#     rnd_rotate_clk = tf.random.shuffle(coin_tensor)[0]
#     rnd_rotate_cclk = tf.random.shuffle(coin_tensor)[0]
#     rnd_zoom = tf.random.shuffle(coin_tensor)[0]
    
#     if rnd_flip == head:
#         print("flip")
#         img, mask = flipping_img_and_msk(img, mask)
        
#     if rnd_rotate_clk == head:
#         print("rotate")
#         img, mask = rotate_clk_img_and_msk(img, mask)
        
#     if rnd_rotate_cclk == head:
#         print("cclk rotate")
#         img, mask = rotate_cclk_img_and_msk(img, mask)
        
#     if rnd_zoom == head:
#         print("zoom")
#         img, mask = zoom_img_and_msk(img, mask)
        
#     return tf.cast(img, dtype = tf.float32),tf.cast(mask, dtype = tf.float32)

# def load_dataset(file_paths,shuffle_buffer,random_state,num_epochs,batch_size,prefecth_buffer_size, training = True):
#     # loading the csv train files
#     dataset = tf.data.TextLineDataset(filenames = file_paths).skip(1).map(preprocessing).cache() # Skiping the header
    
#     if training:
#         # Data augmentation
#         dataset = dataset.map(data_augmentation).shuffle(buffer_size = shuffle_buffer, seed=random_state, reshuffle_each_iteration=True).repeat(count = num_epochs) # the dataset be repeated indefinitely
#     else:
#         dataset = dataset.repeat(count = 1)
    
#     # Prefetch prepares the next set of batches while current batch is in use.
#     return dataset.batch(batch_size=batch_size).prefetch(buffer_size=prefecth_buffer_size)


# class ADAMLearningRateTracker(tf.keras.callbacks.Callback):
#     """It prints out the last used learning rate after each epoch (useful for resuming a training)
#     original code: https://github.com/keras-team/keras/issues/7874#issuecomment-329347949
#     """

#     def __init__(self, end_lr):
#         super(ADAMLearningRateTracker, self).__init__()
#         self.end_lr = end_lr

#     def on_epoch_end(self, epoch, logs={}):  # works only when decay in optimizer is zero
#         optimizer = self.model.optimizer
#         # t = K.cast(optimizer.iterations, K.floatx()) + 1
#         # lr_t = K.eval(optimizer.lr * (K.sqrt(1. - K.pow(optimizer.beta_2, t)) /
#         #                               (1. - K.pow(optimizer.beta_1, t))))
#         # print('\n***The last Actual Learning rate in this epoch is:', lr_t,'***\n')
#         print('\n***The last Basic Learning rate in this epoch is:', K.eval(optimizer.lr), '***\n')
#         # stops the training if the basic lr is less than or equal to end_learning_rate
#         if K.eval(optimizer.lr) <= self.end_lr:
#             print("training is finished")
#             self.model.stop_training = True
            
# smooth = 0.0000001
# def jacc_coef(y_true, y_pred):
#     y_true_f = K.flatten(y_true)
#     y_pred_f = K.flatten(y_pred)
#     intersection = K.sum(y_true_f * y_pred_f)
#     return 1 - ((intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + smooth))


In [17]:
16//5

3

In [22]:
x = tf.constant(16)
x

<tf.Tensor: shape=(), dtype=int32, numpy=16>

In [23]:
y = tf.constant(5)
y

<tf.Tensor: shape=(), dtype=int32, numpy=5>

In [24]:
tf.math.floordiv(
    x, y, name=None
)

<tf.Tensor: shape=(), dtype=int32, numpy=3>

In [66]:
kk  = tf.random.shuffle(tf.constant([1,2]))[0]
kk

<tf.Tensor: shape=(), dtype=int32, numpy=1>

In [68]:
kk == tf.constant(1)

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [56]:
a = tf.constant(192)
a

<tf.Tensor: shape=(), dtype=int32, numpy=192>

In [57]:
b = tf.constant(0.2)
b

<tf.Tensor: shape=(), dtype=float32, numpy=0.2>

In [58]:
a*b

InvalidArgumentError: cannot compute Mul as input #1(zero-based) was expected to be a int32 tensor but is a float tensor [Op:Mul]

In [60]:
b

<tf.Tensor: shape=(), dtype=float32, numpy=0.2>

In [61]:
a = tf.cast(a, dtype=tf.float32)
b = tf.cast(b, dtype=tf.float32)

In [62]:
a*b

<tf.Tensor: shape=(), dtype=float32, numpy=38.4>

In [63]:
tf.math.multiply(a, b)

<tf.Tensor: shape=(), dtype=float32, numpy=38.4>

In [52]:
help(tf.cast)

Help on function cast in module tensorflow.python.ops.math_ops:

cast(x, dtype, name=None)
    Casts a tensor to a new type.
    
    The operation casts `x` (in case of `Tensor`) or `x.values`
    (in case of `SparseTensor` or `IndexedSlices`) to `dtype`.
    
    For example:
    
    >>> x = tf.constant([1.8, 2.2], dtype=tf.float32)
    >>> tf.cast(x, tf.int32)
    <tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>
    
    Notice `tf.cast` has an alias `tf.dtypes.cast`:
    
    >>> x = tf.constant([1.8, 2.2], dtype=tf.float32)
    >>> tf.dtypes.cast(x, tf.int32)
    <tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>
    
    The operation supports data types (for `x` and `dtype`) of
    `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`,
    `float16`, `float32`, `float64`, `complex64`, `complex128`, `bfloat16`.
    In case of casting from complex types (`complex64`, `complex128`) to real
    types, only the real part o

In [53]:
tf.cast(tf.constant([12.5]),dtype = tf.int32)

<tf.Tensor: shape=(1,), dtype=int32, numpy=array([12], dtype=int32)>

In [40]:
d = train_dataset.skip(1).map(parser)

In [41]:
im = list(d.take(5))



In [42]:
len(im)

5

In [43]:
len(im[0])

2

In [44]:
im[0][0].shape

TensorShape([512, 512, 16])

In [45]:
im[0][1].shape

TensorShape([512, 512, 4])

In [37]:
im[0][1].dtype

tf.uint8

In [38]:
im[0][1]

<tf.Tensor: shape=(512, 512, 4), dtype=uint8, numpy=
array([[[  0,   0,   0, 255],
        [  0,   0,   0, 255],
        [  0,   0,   0, 255],
        ...,
        [  1,   1,   1, 255],
        [  1,   1,   1, 255],
        [  1,   1,   1, 255]],

       [[  0,   0,   0, 255],
        [  0,   0,   0, 255],
        [  0,   0,   0, 255],
        ...,
        [  1,   1,   1, 255],
        [  1,   1,   1, 255],
        [  1,   1,   1, 255]],

       [[  0,   0,   0, 255],
        [  0,   0,   0, 255],
        [  0,   0,   0, 255],
        ...,
        [  1,   1,   1, 255],
        [  1,   1,   1, 255],
        [  1,   1,   1, 255]],

       ...,

       [[  0,   0,   0, 255],
        [  0,   0,   0, 255],
        [  0,   0,   0, 255],
        ...,
        [  0,   0,   0, 255],
        [  0,   0,   0, 255],
        [  0,   0,   0, 255]],

       [[  0,   0,   0, 255],
        [  0,   0,   0, 255],
        [  0,   0,   0, 255],
        ...,
        [  0,   0,   0, 255],
        [  0,   0,   