In [None]:

import kfp
from kfp import dsl
import kfp.components as components
from typing import NamedTuple
from datetime import datetime

def split_data():
    import os
    from glob import glob
    from minio import Minio
    import numpy as np
    from PIL import Image
    from sklearn.model_selection import train_test_split

    ## get data from minio
    minio_client = Minio(
       "<minio_ep>",
        access_key="<minio_accK>",
        secret_key="<minio_secK>",
        secure=False
    )
    minio_bucket = "kubeflow"


    ## get data from minio
    print('Downloading data from minio...')
    for f in minio_client.list_objects(minio_bucket, prefix="datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/",recursive=True):
        save_path = f.object_name.replace('datasets/breast_cancer_segmentation', '/tmp')
        minio_client.fget_object(minio_bucket, f.object_name, save_path)

    class BrestCancerData:
        def __init__(self,size=256):
            self._image_path = ''
            self._mask_path = []
            self._size = size

        def set_img_path(self, image_path):
            self._image_path = image_path
        def set_mask_path(self, mask_path):
            self._mask_path.append(mask_path)

        def get_img_path(self):
            return self._image_path
        def get_mask_path(self):
            return self._mask_path

        def put_img(self):
            if self._image_path:
                img = Image.open(self._image_path).convert('RGB')
                return img
            else:
                print('Please set the image path!')
                return None
        def put_mask(self):
            if self._mask_path:
                mask_lists = []
                for mPath in self._mask_path:
                    mask = Image.open(mPath).convert('L')
                    mask = np.array(mask)
                    mask_lists.append(mask)
                sum_mask = np.clip(sum(mask_lists,np.zeros_like(mask_lists[0])), 0, 255)
                return Image.fromarray(sum_mask.astype(np.uint8))
            else:
                print('Please set the mask path!')
                return None

    data_all = {}
    unnormal_path = glob('/tmp/Dataset_BUSI_with_GT/benign/*.png') + glob('/tmp/Dataset_BUSI_with_GT/malignant/*.png')
    for fPath in unnormal_path:
        _, file_name = os.path.split(fPath)
        file_name, _ = os.path.splitext(file_name)
        stack = file_name.split('_')

        if  data_all.get(stack[0], None) == None:
            data_all[stack[0]] = BrestCancerData()

        if len(stack) >= 2: # mask
            data_all[stack[0]].set_mask_path(fPath)
        else: # image
            data_all[stack[0]].set_img_path(fPath)

    name_lists = list(data_all.keys())
    train_name, val_test_name = train_test_split(name_lists, test_size=0.2, random_state=42)
    val_name, test_name = train_test_split(val_test_name, test_size=0.5, random_state=42)
    ## save train_dataset
    print('Saving train_dataset...')
    for idx, name in enumerate(train_name):
        img = data_all[name].put_img()
        mask = data_all[name].put_mask()

        sPath = f'/tmp/train_dataset/{idx+1}'
        os.makedirs(sPath)
        img.save(f'{sPath}/image.png')
        mask.save(f'{sPath}/mask.png')
        with open(f'{sPath}/info', 'w') as f:
            f.write(f'Instance: {name}')

    ## save val_dataset
    print('Saving val_dataset...')
    for idx, name in enumerate(val_name):
        img = data_all[name].put_img()
        mask = data_all[name].put_mask()

        sPath = f'/tmp/val_dataset/{idx+1}'
        os.makedirs(sPath)
        img.save(f'{sPath}/image.png')
        mask.save(f'{sPath}/mask.png')
        with open(f'{sPath}/info', 'w') as f:
            f.write(f'Instance: {name}')

    ## save test_dataset
    print('Saving test_dataset...')
    for idx, name in enumerate(test_name):
        img = data_all[name].put_img()
        mask = data_all[name].put_mask()

        sPath = f'/tmp/test_dataset/{idx+1}'
        os.makedirs(sPath)
        img.save(f'{sPath}/image.png')
        mask.save(f'{sPath}/mask.png')
        with open(f'{sPath}/info', 'w') as f:
            f.write(f'Instance: {name}')

    def upload_local_directory_to_minio(local_path, bucket_name, minio_path):
        assert os.path.isdir(local_path)

        for local_file in glob(local_path + '/**'):
            local_file = local_file.replace(os.sep, "/") # Replace \ with / on Windows
            if not os.path.isfile(local_file):
                upload_local_directory_to_minio(
                    local_file, bucket_name, minio_path + "/" + os.path.basename(local_file))
            else:
                remote_path = os.path.join(
                    minio_path, local_file[1 + len(local_path):])
                remote_path = remote_path.replace(
                    os.sep, "/")  # Replace \ with / on Windows
                minio_client.fput_object(bucket_name, remote_path, local_file)

    ## upload train_dataset to minio
    print('Uploading train_dataset to minio...')
    upload_local_directory_to_minio('/tmp/train_dataset', minio_bucket, 'datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/train_dataset')
    ## upload val_dataset to minio
    print('Uploading val_dataset to minio...')
    upload_local_directory_to_minio('/tmp/val_dataset', minio_bucket, 'datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/val_dataset')
    ## upload test_dataset to minio
    print('Uploading test_dataset to minio...')
    upload_local_directory_to_minio('/tmp/test_dataset', minio_bucket, 'datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/test_dataset')



def model_building(
    epoch: int,
) -> NamedTuple('Output', [('mlpipeline_ui_metadata', 'UI_metadata'),('mlpipeline_metrics', 'Metrics')]):
    """
    Build the model with Keras API
    Export model parameters
    """

    import os
    import json
    import keras
    import numpy as np
    import pandas as pd
    from glob import glob
    import tensorflow as tf

    # Model 
    from keras.models import Model
    from keras.layers import Layer
    from keras.layers import Conv2D
    from keras.layers import Dropout
    from keras.layers import UpSampling2D
    from keras.layers import concatenate
    from keras.layers import Add
    from keras.layers import Multiply
    from keras.layers import Input
    from keras.layers import MaxPool2D
    from keras.layers import BatchNormalization
    
    # Callbacks 
    from keras.callbacks import Callback
    from keras.callbacks import EarlyStopping
    from keras.callbacks import ModelCheckpoint
    # Metrics
    from keras.metrics import MeanIoU
    from minio import Minio
    
    minio_client = Minio(
        "<minio_ep>",
        access_key="<minio_accK>",
        secret_key="<minio_secK>",
        secure=False
    )
    minio_bucket = "kubeflow"
    
    ## load image and mask
    def load_image_and_mask(folder_path):
    
        image_file = tf.strings.join([folder_path, "image.png"], separator="/")
        mask_file = tf.strings.join([folder_path, "mask.png"], separator="/")
        
        # load image
        image = tf.io.read_file(image_file)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [256, 256])
        image = tf.cast(image, tf.float32) / 255.0
        
        # load mask
        mask = tf.io.read_file(mask_file)
        mask = tf.image.decode_png(mask, channels=1)
        mask = tf.image.resize(mask, [256, 256], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        mask = tf.cast(mask, tf.float32) / 255.0

        return image, mask
    def configure_for_performance(ds):
        ds = ds.cache()
        ds = ds.shuffle(buffer_size=1000)
        ds = ds.batch(8)
        ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
        
        return ds
    ## get train_dataset
    ### minio get
    print('Downloading training data from minio...')
    for f in minio_client.list_objects(minio_bucket, prefix="datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/train_dataset/",recursive=True):
        save_path = f.object_name.replace('datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT', '/tmp')
        minio_client.fget_object(minio_bucket, f.object_name, save_path)
    ### train dataset /tmp/train_dataset/*
    train_ds = tf.data.Dataset.list_files('/tmp/train_dataset/*')
    train_ds = train_ds.map(load_image_and_mask)
    train_ds = configure_for_performance(train_ds)
    ## get val_dataset
    print('Downloading validation data from minio...')
    for f in minio_client.list_objects(minio_bucket, prefix="datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/val_dataset/",recursive=True):
        save_path = f.object_name.replace('datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT', '/tmp')
        minio_client.fget_object(minio_bucket, f.object_name, save_path)
    ### minio get
    val_ds = tf.data.Dataset.list_files('/tmp/val_dataset/*')
    val_ds = val_ds.map(load_image_and_mask)
    val_ds = configure_for_performance(val_ds)
    ## get test_dataset
    ### minio get
    print('Downloading test data from minio...')
    for f in minio_client.list_objects(minio_bucket, prefix="datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT/test_dataset/",recursive=True):
        save_path = f.object_name.replace('datasets/breast_cancer_segmentation/Dataset_BUSI_with_GT', '/tmp')
        minio_client.fget_object(minio_bucket, f.object_name, save_path)

    ## model layers
    ### Encoder
    class EncoderBlock(Layer):

        def __init__(self, filters, rate, pooling=True, **kwargs):
            super(EncoderBlock, self).__init__(**kwargs)

            self.filters = filters
            self.rate = rate
            self.pooling = pooling

            self.c1 = Conv2D(filters, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')
            self.drop = Dropout(rate)
            self.c2 = Conv2D(filters, kernel_size=3, strides=1, padding='same', activation='relu', kernel_initializer='he_normal')
            self.pool = MaxPool2D()

        def call(self, X):
            x = self.c1(X)
            x = self.drop(x)
            x = self.c2(x)
            if self.pooling:
                y = self.pool(x)
                return y, x
            else:
                return x

        def get_config(self):
            base_config = super().get_config()
            return {
                **base_config,
                "filters":self.filters,
                'rate':self.rate,
                'pooling':self.pooling
            }
    
    ### decoder
    class DecoderBlock(Layer):

        def __init__(self, filters, rate, **kwargs):
            super(DecoderBlock, self).__init__(**kwargs)

            self.filters = filters
            self.rate = rate

            self.up = UpSampling2D()
            self.net = EncoderBlock(filters, rate, pooling=False)

        def call(self, X):
            X, skip_X = X
            x = self.up(X)
            c_ = concatenate([x, skip_X])
            x = self.net(c_)
            return x

        def get_config(self):
            base_config = super().get_config()
            return {
                **base_config,
                "filters":self.filters,
                'rate':self.rate,
            }      
    
    ### Attention Gate
    class AttentionGate(Layer):

        def __init__(self, filters, bn, **kwargs):
            super(AttentionGate, self).__init__(**kwargs)

            self.filters = filters
            self.bn = bn

            self.normal = Conv2D(filters, kernel_size=3, padding='same', activation='relu', kernel_initializer='he_normal')
            self.down = Conv2D(filters, kernel_size=3, strides=2, padding='same', activation='relu', kernel_initializer='he_normal')
            self.learn = Conv2D(1, kernel_size=1, padding='same', activation='sigmoid')
            self.resample = UpSampling2D()
            self.BN = BatchNormalization()

        def call(self, X):
            X, skip_X = X

            x = self.normal(X)
            skip = self.down(skip_X)
            x = Add()([x, skip])
            x = self.learn(x)
            x = self.resample(x)
            f = Multiply()([x, skip_X])
            if self.bn:
                return self.BN(f)
            else:
                return f


        def get_config(self):
            base_config = super().get_config()
            return {
                **base_config,
                "filters":self.filters,
                "bn":self.bn
            }    

    ### Callback (option)
    # pass

    ### Attention U-Net
    # Inputs
    input_layer = Input(shape=(256,256,3)) ## `images` is input data

    # Encoder
    p1, c1 = EncoderBlock(32,0.1, name="Encoder1")(input_layer)
    p2, c2 = EncoderBlock(64,0.1, name="Encoder2")(p1)
    p3, c3 = EncoderBlock(128,0.2, name="Encoder3")(p2)
    p4, c4 = EncoderBlock(256,0.2, name="Encoder4")(p3)

    # Encoding
    encoding = EncoderBlock(512,0.3, pooling=False, name="Encoding")(p4)

    # Attention + Decoder

    a1 = AttentionGate(256, bn=True, name="Attention1")([encoding, c4])
    d1 = DecoderBlock(256,0.2, name="Decoder1")([encoding, a1])

    a2 = AttentionGate(128, bn=True, name="Attention2")([d1, c3])
    d2 = DecoderBlock(128,0.2, name="Decoder2")([d1, a2])

    a3 = AttentionGate(64, bn=True, name="Attention3")([d2, c2])
    d3 = DecoderBlock(64,0.1, name="Decoder3")([d2, a3])


    a4 = AttentionGate(32, bn=True, name="Attention4")([d3, c1])
    d4 = DecoderBlock(32,0.1, name="Decoder4")([d3, a4])

    # Output 
    output_layer = Conv2D(1, kernel_size=1, activation='sigmoid', padding='same')(d4)

    # Model
    model = Model(
        inputs=[input_layer],
        outputs=[output_layer]
    )

    # Compile
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy', MeanIoU(num_classes=2, name='IoU')]
    )

    # Callbacks optional
    # cb = [
    #     EarlyStopping(patience=3, restore_best_weight=True), 
    #     ModelCheckpoint("AttentionCustomUNet.h5", save_best_only=True),
    #     # ShowProgress()
    # ]
    ## model summaryu
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    metric_model_summary = "\n".join(stringlist)

    # Config Training

    # Training
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epoch, # 15 will be enough for a good Model for better model go with 20+
        # callbacks=cb # optional
    )
    
    ## TEST Stage
    ### test dataset /tmp/test_dataset/*
    test_ds = tf.data.Dataset.list_files('/tmp/test_dataset/*')
    test_ds = test_ds.map(load_image_and_mask)
    test_ds = configure_for_performance(test_ds)
    loss, acc, iou = model.evaluate(test_ds)
    
    
    ## metadata
    metadata = {
        "outputs": [
            {
                'storage': 'inline',
                'source': '''# Model Overview
## Model Summary

```
{}
```

## Model Performance

**Accuracy**: {}
**Loss**: {}
**IoU**: {}
'''.format(metric_model_summary,acc,loss,iou),
                'type': 'markdown',
            }
        ]
    }
    
    metrics = {
      'metrics': [{
          'name': 'Accuracy',
          'numberValue':  float(acc),
          'format' : "PERCENTAGE"
        },{
          'name': 'Loss',
          'numberValue':  float(loss),
          'format' : "RAW"
        }
        ]}
    
    ### Save model to minIO
    print('start saving model')
    keras.models.save_model(model, "/tmp/attention_unet")





    def upload_local_directory_to_minio(local_path, bucket_name, minio_path):
        assert os.path.isdir(local_path)

        for local_file in glob(local_path + '/**'):
            local_file = local_file.replace(os.sep, "/") # Replace \ with / on Windows
            if not os.path.isfile(local_file):
                upload_local_directory_to_minio(
                    local_file, bucket_name, minio_path + "/" + os.path.basename(local_file))
            else:
                remote_path = os.path.join(
                    minio_path, local_file[1 + len(local_path):])
                remote_path = remote_path.replace(
                    os.sep, "/")  # Replace \ with / on Windows
                minio_client.fput_object(bucket_name, remote_path, local_file)

    upload_local_directory_to_minio("/tmp/attention_unet",minio_bucket,"models/attention_unet/1") # 1 for version 1
    
    print("Saved model to minIO")
    
    from collections import namedtuple
    output = namedtuple('output', ['mlpipeline_ui_metadata', 'mlpipeline_metrics'])
    return output(json.dumps(metadata),json.dumps(metrics))

component_split_data = components.create_component_from_func(split_data,base_image="python:3.10.0",
                                                            packages_to_install=['scikit-learn','minio','numpy','pillow'])
component_model_building = components.create_component_from_func(model_building,base_image="tensorflow/tensorflow:2.9.3",
                                                            packages_to_install=['scikit-learn','minio','pandas','numpy','tf_explain'])


@dsl.pipeline(
    name='breast-cancer-segmentation',
    description='example pipeline for breast cancer segmentation'
)
def output_test( epochs):
    
    now = datetime.now()
    v = now.strftime("%Y%m%d%H%M%S")
    minio_bucket = "kubeflow"
    
    step1 = component_split_data()
    step2 = component_model_building(epochs)
    step2.after(step1)
    
    seldon_deployment = {
        "apiVersion": "machinelearning.seldon.io/v1",
        "kind": "SeldonDeployment",
        "metadata": {
            "name": f"breast-cancer-segmentation-{v}",
            "namespace": "kubeflow-user-example-com"
        },
        "spec": {
            "protocol": "seldon",
            "predictors": [
                {
                    "name": "predictor",
                    "replicas": 1,
                    "graph": {
                        "name": "classifier",
                        "implementation": "TENSORFLOW_SERVER",
                        "modelUri": f"s3://{minio_bucket}/models/attention_unet",
                        "envSecretRefName": "seldon-init-container-secret"
                    }
                }
            ]
        }
    }
    
    step3 = dsl.ResourceOp(
        name=f'seldon-deployment-{v}',
        k8s_resource=seldon_deployment,
        action="create",
        attribute_outputs={"name": "{.metadata.name}"}
    )
    step3.after(step2)

if __name__ == "__main__":
    kubeflow_gateway_endpoint = "<kubeflow-gateway-endpoint>" # e.g. 172.0.0.1
    authservice_session_cookie = "<authservice_session_cookie>"
    
    client = kfp.Client(host=f"https://{kubeflow_gateway_endpoint}/pipeline",
                        cookies=f"authservice_session={authservice_session_cookie}",
                        ssl_ca_cert="cert/tls.crt") # need to store tls.crt before running the pipeline

    arguments = {
        "epochs": 4
    }

    client.create_run_from_pipeline_func(output_test,arguments=arguments,experiment_name="breast-cancer-segmentation")