In [2]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
import sys
sys.path.append("../train")
sys.path.append("..")
import tools
import main as m
import matplotlib.pyplot as plt
import argparse
import json
from tensorflow.keras import backend as K
%load_ext autoreload
%autoreload 2

In [3]:
import tensorflow as tf
import numpy as np

def parse_function(img_shape=(128, 128, 1), test=False, deep_outputs=1):
    def parsing(example_proto):
        keys_to_features = {'x':tf.io.FixedLenFeature(shape=img_shape, dtype=tf.float32),
                        'y': tf.io.FixedLenFeature(shape=img_shape, dtype=tf.int64)}
        parsed_features = tf.io.parse_single_example(example_proto, keys_to_features)
        parsed_features['y'] = tf.cast(parsed_features['y'], tf.float32)
        parsed_features['x'] = tf.clip_by_value(parsed_features['x'], -100, 100)
        if test:
            return parsed_features['x']
        else:
            targets = tuple([])
            for i in range(deep_outputs):
                exponent=deep_outputs-i-1
                targets=targets+(parsed_features['y'][::2**exponent,::2**exponent],)
            return parsed_features['x'], targets
    return parsing

def get_shape_of_quadratic_image_tfrecord(raw_dataset):
    keys_to_features = {'x': tf.io.VarLenFeature(dtype=tf.float32),
                        'y': tf.io.VarLenFeature(dtype=tf.int64)}
    for i in raw_dataset.take(1):
        parsed_features = tf.io.parse_single_example(i, keys_to_features)
        return (int(np.sqrt(parsed_features["x"].shape[0])), int(np.sqrt(parsed_features["x"].shape[0])), 1)


def custom_loss_sum(losses):
    def custom_loss(y_true, y_pred):
        loss = 0
        for i, l in enumerate(losses):
            loss += l(y_true, y_pred)
        return loss
    return custom_loss

class F1_Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.f1 = self.add_weight(name='f1', initializer='zeros')
        self.counter = self.add_weight(name='counter', initializer='zeros')
        self.precision_fn = tf.keras.metrics.Precision(thresholds=0.5)
        self.recall_fn = tf.keras.metrics.Recall(thresholds=0.5)
        self.count = self.add_weight(name='F1ScoreCount', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision_fn.reset_state()
        self.recall_fn.reset_state()
        p = self.precision_fn(y_true, y_pred)
        r = self.recall_fn(y_true, y_pred)
        self.f1.assign_add(2 * ((p * r) / (p + r + 1e-6)))
        self.count.assign_add(1)


    def result(self):
        return self.f1/self.count

    def reset_state(self):
        # we also need to reset the state of the precision and recall objects
        self.precision_fn.reset_state()
        self.recall_fn.reset_state()
        self.f1.assign(0)
        self.count.assign(0)

def get_architecture_from_model(model):
    """
    Extracts the architecture of a model and returns it as a dictionary.
    :param model: tensorflow model
    :return: dictionary with the architecture
    """
    architecture = {
        "downFilters":[],
        "downActivation": [],
        "downDropout": [],
        "downMaxPool": [],
        "upFilters": [],
        "upActivation": [],
        "upDropout": []}
    for layer in model.layers:
        if ("block" in layer.name.lower()) and ("conv1" in layer.name.lower()):
            if layer.name.lower()[0]=="e":
                architecture["downFilters"].append(layer.filters)
                architecture["downActivation"].append(layer.activation.__name__)
            elif layer.name.lower()[0]=="d":
                architecture["upFilters"].append(layer.filters)
                architecture["upActivation"].append(layer.activation.__name__)
        elif ("block" in layer.name.lower()) and ("drop" in layer.name.lower()):
            if layer.name.lower()[0]=="e":
                architecture["downDropout"].append(layer.rate)
            elif layer.name.lower()[0]=="d":
                architecture["upDropout"].append(layer.rate)
        elif ("eblock" in layer.name.lower()) and ("pool" in layer.name.lower()):
            current_layer = int(layer.name.lower()[6])
            if len(architecture["downMaxPool"])<current_layer:
                for i in range(current_layer-len(architecture["downMaxPool"])):
                    architecture["downMaxPool"].append(False)
            architecture["downMaxPool"].append(True)
    return architecture
def encoder_mini_block(inputs, n_filters=32, activation="relu", dropout_prob=0.3, max_pooling=True, name=""):
    """
    Encoder mini block for U-Net architecture. It consists of two convolutional layers with the same activation function
    and number of filters. Optionally, a dropout layer can be added after the second convolutional layer. If max_pooling
    is set to True, a max pooling layer is added at the end of the block. The skip connection is the output of the second
    convolutional layer.

    :param inputs: Input tensor to the block
    :param n_filters: Number of filters for the convolutional layers
    :param activation: Activation function for the convolutional layers
    :param dropout_prob: Dropout probability for the dropout layer (0 means no dropout)
    :param max_pooling: Boolean to add a max pooling layer at the end of the block
    :param name: Name of the block (Optional)
    :return: The output tensor of the block and the skip connection tensor
    """
    inputs = tf.keras.layers.BatchNormalization(name="eblock" + name + "norm") (inputs)
    conv = tf.keras.layers.Conv2D(n_filters,
                                  3,  # filter size
                                  activation=activation,
                                  padding='same',
                                  kernel_initializer='HeNormal',
                                  name="eblock" + name + "conv1")(inputs)
    if dropout_prob > 0:
        conv = tf.keras.layers.Dropout(dropout_prob, name="eblock" + name + "drop")(conv)
    if max_pooling:
        next_layer = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), name="eblock" + name + "pool")(conv)
    else:
        next_layer = conv
    skip_connection = conv
    return next_layer, skip_connection


def decoder_mini_block(prev_layer_input, skip_layer_input, n_filters=32, activation="relu", dropout_prob=0.3,
                       max_pooling=True, name=""):
    """
    Decoder mini block for U-Net architecture that consists of a transposed convolutional layer followed by two
    convolutional layers. The skip connection is the concatenation of the transposed convolutional layer and the
    corresponding encoder skip connection.

    :param prev_layer_input: Input tensor to the block from the previous layer
    :param skip_layer_input: Input tensor to the block from the corresponding encoder skip connection
    :param n_filters: Number of filters for the convolutional layers
    :param activation: Activation function for the convolutional layers
    :param name: Name of the block (Optional)
    :return: The output tensor of the block
    """
    if max_pooling:
        prev_layer_input = tf.keras.layers.Conv2DTranspose(n_filters,
                                         (3, 3),
                                         strides=(2, 2),
                                         padding='same',
                                         name="dblock" + name + "convT")(prev_layer_input)
    merge = tf.keras.layers.concatenate([prev_layer_input, skip_layer_input], axis=-1, name="dblock" + name + "concat")
    conv = tf.keras.layers.BatchNormalization(name="dblock" + name + "norm")(merge)
    conv = tf.keras.layers.Conv2D(n_filters,
                                  3,  # filter size
                                  activation=activation,
                                  padding='same',
                                  kernel_initializer='HeNormal',
                                  name="dblock" + name + "conv1")(conv)
    if dropout_prob > 0:
        conv = tf.keras.layers.Dropout(dropout_prob, name="dblock" + name + "drop")(conv)
    return conv


def unet_model(input_size, arhitecture, deepSupervised=True):
    """
    U-Net model for semantic segmentation. The model consists of an encoder and a decoder. The encoder downsamples the
    input image and extracts features. The decoder upsamples the features and generates the segmentation mask. Skip
    connections are used to concatenate the encoder features with the decoder features. The model is created from the
    architecture dictionary that contains the number of filters, activation functions, dropout probabilities, and max
    pooling for each mini block.

    :param input_size: Size of the input image
    :param arhitecture: Dictionary containing the architecture of the U-Net model
    :return: U-Net model
    """

    inputs = tf.keras.layers.Input(input_size, name="input")
    skip_connections = []
    layer = inputs
    # Encoder
    for i in range(len(arhitecture["downFilters"])):
        layer, skip = encoder_mini_block(layer,
                                         n_filters=arhitecture["downFilters"][i],
                                         activation=arhitecture["downActivation"][i],
                                         dropout_prob=arhitecture["downDropout"][i],
                                         max_pooling=arhitecture["downMaxPool"][i],
                                         name=str(i))
        skip_connections.append(skip)

    # Decoder
    outputs=[]
    for i in range(len(arhitecture["upFilters"])):
        layer = decoder_mini_block(layer,
                                   skip_connections[len(arhitecture["upFilters"])-1-i],
                                   n_filters=arhitecture["upFilters"][i],
                                   activation=arhitecture["upActivation"][i],
                                   dropout_prob=arhitecture["upDropout"][i],
                                   max_pooling=arhitecture["downMaxPool"][len(arhitecture["upFilters"])-1-i],
                                   name=str(len(arhitecture["upFilters"])-1-i))
        if deepSupervised:
            outputs.append(tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid', name="output"+str(i))(layer))
    if not deepSupervised:
        outputs = [tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid', name="output")(layer)]

    model = tf.keras.Model(inputs=[inputs], outputs=outputs, name="AsteroidNET")
    return model

In [4]:
epsilon = 1e-5
smooth = 1

def tversky(y_true, y_pred):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1-y_pred_pos))
    false_pos = K.sum((1-y_true_pos)*y_pred_pos)
    alpha = 0.7
    return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)

def tversky_loss(y_true, y_pred):
    return 1 - tversky(y_true,y_pred)

def focal_tversky(y_true,y_pred):
    pt_1 = tversky(y_true, y_pred)
    gamma = 0.75
    return K.pow((1-pt_1), gamma)

In [16]:
def main (args):
    with open(args.arhitecture) as f:
        arhitecture = json.load(f)
    if "0" in arhitecture.keys():
        arhitecture = arhitecture["0"]
    dataset_train = tf.data.TFRecordDataset([args.train_dataset_path])
    tfrecord_shape = tools.model.get_shape_of_quadratic_image_tfrecord(dataset_train)
    dataset_train = dataset_train.map(parse_function(img_shape=tfrecord_shape, test=False, 
                                                                 deep_outputs=len(arhitecture["upFilters"])))
    dataset_train = dataset_train.shuffle(5*args.batch_size).batch(args.batch_size).prefetch(2)
    dataset_val = tf.data.TFRecordDataset([args.test_dataset_path])
    dataset_val = dataset_val.map(parse_function(img_shape=tfrecord_shape, test=False, 
                                                 deep_outputs=len(arhitecture["upFilters"])))
    dataset_val = dataset_val.batch(args.batch_size).prefetch(2)

    mirrored_strategy = tf.distribute.MirroredStrategy()
    FT = focal_tversky
    with mirrored_strategy.scope():
        model = unet_model((128, 128, 1), arhitecture)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=args.start_lr), loss=FT,
                      metrics=["Precision", "Recall", tools.model.F1_Score()])
    earlystopping_kb = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5*args.decay_lr_patience, verbose=1,
                                                        restore_best_weights=True)
    terminateonnan_kb = tf.keras.callbacks.TerminateOnNaN()
    reducelronplateau_kb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=args.decay_lr_rate,
                                                                patience=args.decay_lr_patience, verbose=1)
    try:
        results = model.fit(dataset_train, epochs=args.epochs, validation_data=dataset_val,
                            callbacks=[earlystopping_kb, terminateonnan_kb, reducelronplateau_kb], verbose=1)
    except KeyboardInterrupt:
        #model.save(args.model_destination)
        #print ("Model saved")
        return model
    model.save(args.model_destination)
    return model

In [17]:
def parse_arguments(args):
    """Parse command line arguments.
    Args:
        args (list): Command line arguments.
    Returns:
        args (Namespace): Parsed command line arguments.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_dataset_path', type=str,
                        default='../DATA/train1.tfrecord',
                        help='Path to training dataset.')
    parser.add_argument('--test_dataset_path', type=str,
                        default='../DATA/test1.tfrecord',
                        help='Path to test dataset.')
    parser.add_argument('--arhitecture', type=str,
                        default="../DATA/arhitecture_tuned.json",
                        help='Path to a JSON containing definition of an arhitecture.')
    parser.add_argument('--model_destination', type=str,
                        default="../DATA/Trained_model3",
                        help='Path where to save the model once trained.')
    parser.add_argument('--epochs', type=int,
                        default=64,
                        help='Number of epochs.')
    parser.add_argument('--batch_size', type=int,
                        default=256,
                        help='Batch size.')
    parser.add_argument('--class_balancing_alpha', type=float,
                        default=0.95,
                        help='How much to weight the positive class in the loss function.')
    parser.add_argument('--start_lr', type=float,
                        default=0.00005,
                        help='Initial learning rate.')
    parser.add_argument('--decay_lr_rate', type=float,
                        default=0.5,
                        help='Rate at which to decay the learning rate upon reaching the plateau.')
    parser.add_argument('--decay_lr_patience', type=float,
                        default=2,
                        help='Number of iteration to wait upon reaching the plataeau.')
    return parser.parse_args(args)

In [18]:
args = parse_arguments([])
model=main(args)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


Epoch 1/64
INFO:tensorflow:Collective all_reduce tensors: 60 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Collective all_reduce tensors: 60 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Collective all_reduce tensors: 60 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Collective all_reduce tensors: 60 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


Epoch 2/64
Epoch 3/64
Epoch 4/64
Epoch 5/64
Epoch 6/64
Epoch 7/64


Epoch 8/64
Epoch 9/64
Epoch 10/64
Epoch 11/64
 29/363 [=>............................] - ETA: 2:30 - loss: 4.1722 - output0_loss: 0.8664 - output1_loss: 0.8093 - output2_loss: 0.9991 - output3_loss: 0.7357 - output4_loss: 0.7618 - output0_precision_5: 0.0500 - output0_recall_5: 0.0633 - output0_f1_score: 0.0504 - output1_precision_6: 0.2895 - output1_recall_6: 0.1337 - output1_f1_score: 0.1564 - output2_precision_7: 0.0392 - output2_recall_7: 0.2835 - output2_f1_score: 0.0688 - output3_precision_8: 0.3785 - output3_recall_8: 0.3700 - output3_f1_score: 0.3399 - output4_precision_9: 0.3389 - output4_recall_9: 0.3845 - output4_f1_score: 0.3268

In [15]:
model.summary()

Model: "AsteroidNET"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input (InputLayer)          [(None, 128, 128, 1)]        0         []                            
                                                                                                  
 eblock0norm (BatchNormaliz  (None, 128, 128, 1)          4         ['input[0][0]']               
 ation)                                                                                           
                                                                                                  
 eblock0conv1 (Conv2D)       (None, 128, 128, 25)         250       ['eblock0norm[0][0]']         
                                                                                                  
 eblock0drop (Dropout)       (None, 128, 128, 25)         0         ['eblock0conv1[0][0]

 ation)                                                                                           
                                                                                                  
 dblock2conv1 (Conv2D)       (None, 32, 32, 32)           18464     ['dblock2norm[0][0]']         
                                                                                                  
 dblock2drop (Dropout)       (None, 32, 32, 32)           0         ['dblock2conv1[0][0]']        
                                                                                                  
 dblock1convT (Conv2DTransp  (None, 64, 64, 52)           15028     ['dblock2drop[0][0]']         
 ose)                                                                                             
                                                                                                  
 dblock1concat (Concatenate  (None, 64, 64, 107)          0         ['dblock1convT[0][0]',        
 )        

In [1]:
!nvidia-smi

Mon May  6 08:49:44 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.06              Driver Version: 545.23.06    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 2080 Ti     Off | 00000000:21:00.0  On |                  N/A |
| 32%   37C    P8              20W / 260W |     55MiB / 11264MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 2080 Ti     Off | 00000000:48:0

In [3]:
!ps -up `nvidia-smi -q -x | grep pid | sed -e 's/<pid>//g' -e 's/<\/pid>//g' -e 's/^[[:space:]]*//'`

USER        PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root       2357  0.0  0.0 25485608 40260 tty1   Ssl+ Apr24   0:38 /usr/bin/X :0 
gdm        2996  0.0  0.0 7869304 153940 ?      Sl   Apr24   6:24 /usr/bin/gnome
