In [4]:
# %load "../ml-mangrove/Segmentation/unet.pyp" 
import segmentation_models as sm
import keras
import tensorflow as tf
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import argparse
import rasterio
import subprocess
import tensorflow_datasets as tfds
import shutil
from PIL import Image
from segmentation_models.utils import set_trainable
from glob import glob
from datetime import datetime
from tqdm import tqdm
from gis_utils import raster
from rasterio.plot import show

In [5]:
#importing notebooks
#from ipynb.fs.full.<notebook_name> import *
from ipynb.fs.full.create_seg_dataset import create_seg_dataset
from ipynb.fs.full.gen_seg_labels import gen_seg_labels, tif_to_jpg, tile_raster
from ipynb.fs.full.raster_mask import raster_mask

In [6]:
# Resources: https://yann-leguilly.gitlab.io/post/2019-12-14-tensorflow-tfdata-segmentation/

'''
Documentation/Usage: This script is meant to be called with command line arguments.
--width (required): tile width
--input_rasters (required): space separated list of rasters (orthomosaics)
--input_vectors (required for training): space separated list of shapefiles (ordering should correspond with rasters)
--train: Flag. Add if training.
--test: Flag. Add if testing.
--weights (required): path to weights file, either to write to for training, or to use for testing (.h5)
--backbone (required): name of backbone to use, ex: resnet34, vgg16

For training it should be sufficient to just call the script using the list of rasters and vectors (and other required arguments), 
and currently you have to manually set the hyperparams in the code, but this should eventually be offloaded to a settings file or 
command line arguments. This will result in the training weights being saved in the specified .h5 file.

For testing you just need to call the script on the list of rasters and it will produce a mask of the entire
orthomosaic.
'''
#keras.backend.set_image_data_format('channels_first')
sm.set_framework('tf.keras')    # need this otherwise currently a bug in model.fit when used with tf.Datasets

# Globals
N_CHANNELS = 3
WIDTH = 256
HEIGHT = 256
LOSS_FUNC = sm.losses.DiceLoss()
NUM_CLASSES = 2

In [7]:
def parse_image(img_path: str) -> dict:
    """Load an image and its annotation (mask) and returning
    a dictionary.

    Parameters
    ----------
    img_path : str
        Image (not the mask) location.

    Returns
    -------
    dict
        Dictionary mapping an image and its annotation.
    """
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.uint8)

    # Creating mask path from image path
    mask_path = tf.strings.regex_replace(img_path, "images", "annotations")
    mask_path = tf.strings.regex_replace(mask_path, "image", "annotation")
    mask = tf.io.read_file(mask_path)
    # The masks contain a class index for each pixels
    mask = tf.image.decode_jpeg(mask, channels=1)
    mask = tf.image.convert_image_dtype(mask, tf.uint8)
    
    #mask = tf.where(mask == 255, np.dtype('uint8').type(0), mask)
    # Note that we have to convert the new value (0)
    # With the same dtype than the tensor itself
    

    return {'image': image, 'segmentation_mask': mask}

In [8]:
@tf.function
def normalize(input_image: tf.Tensor, input_mask: tf.Tensor) -> tuple:
    """Rescale the pixel values of the images/masks between 0.0 and 1.0
    compared to [0,255] originally.

    Parameters
    ----------
    input_image : tf.Tensor
        Tensorflow tensor containing an image of size [SIZE,SIZE,3].
    input_mask : tf.Tensor
        Tensorflow tensor containing an annotation of size [SIZE,SIZE,1].

    Returns
    -------
    tuple
        Normalized image and its annotation.
    """
    input_mask = tf.cast(input_mask, tf.float32) / 255.0 # attempting to fix metrics    
    input_image = tf.cast(input_image, tf.float32) / 255.0
    return input_image, input_mask

In [9]:
@tf.function
def load_image_train(datapoint: dict) -> tuple:
    """Apply some transformations to an input dictionary
    containing a train image and its annotation.

    Notes
    -----
    An annotation is a regular  channel image.
    If a transformation such as rotation is applied to the image,
    the same transformation has to be applied on the annotation also.

    Parameters
    ----------
    datapoint : dict
        A dict containing an image and its annotation.

    Returns
    -------
    tuple
        A modified image and its annotation.
    """
   
    input_image = tf.image.resize(datapoint['image'], (HEIGHT, WIDTH))
    input_mask = tf.image.resize(datapoint['segmentation_mask'], (HEIGHT, WIDTH))
    #input_mask = tf.image.rgb_to_grayscale(datapoint['segmentation_mask'])
    
    if tf.random.uniform(()) > 0.5:
        input_image = tf.image.flip_left_right(input_image)
        input_mask = tf.image.flip_left_right(input_mask)
    
    #input_mask = tf.reshape(input_mask, (HEIGHT, WIDTH))  # removing single channel

    input_image, input_mask = normalize(input_image, input_mask)
    
    return input_image, input_mask

In [10]:
@tf.function
def load_image_val(datapoint: dict) -> tuple:
    """Normalize and resize a test image and its annotation.

    Notes
    -----
    Since this is for the val set, we don't need to apply
    any data augmentation technique.

    Parameters
    ----------
    datapoint : dict
        A dict containing an image and its annotation.

    Returns
    -------
    tuple
        A modified image and its annotation.
    """
    input_image = tf.image.resize(datapoint['image'], (HEIGHT, WIDTH))
    input_mask = tf.image.resize(datapoint['segmentation_mask'], (HEIGHT, WIDTH))
    
    input_image, input_mask = normalize(input_image, input_mask)
    #input_mask = tf.reshape(input_mask, (HEIGHT, WIDTH)) # removing single channel

    
    return input_image, input_mask

In [11]:
@tf.function
def load_image(datapoint: dict) -> tuple:
    """Loads and image and resizes it

    Parameters
    ----------
    datapoint : dict
        A dict containing an image and its annotation.

    Returns
    -------
    tuple
        A image and its annotation.
    """
    input_image = tf.image.resize(datapoint['image'], (HEIGHT, WIDTH))
    input_mask = tf.image.resize(datapoint['segmentation_mask'], (HEIGHT, WIDTH))
    #input_mask = tf.image.resize(datapoint['label'], (HEIGHT, WIDTH))

    return input_image, input_mask

In [12]:
def display(display_list):
    plt.figure(figsize=(15, 15))

    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
    plt.show()

In [13]:
def create_mask(pred_mask):
    pred_mask = tf.argmin(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    #return pred_mask[0]
    return pred_mask

In [14]:
def show_predictions(model=None, dataset=None, num=1):
    if dataset:
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            display([image[0], mask[0], create_mask(pred_mask[0])])

In [32]:
def train(backbone, weight_file):
    # For tensorboard
    logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir, update_freq='epoch')


    # For more information about autotune:
    # https://www.tensorflow.org/guide/data_performance#prefetching
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    print(f"Tensorflow ver. {tf.__version__}")

    # For reproducibility
    SEED = 42

    # Data
    training_data = "../dataset/training/"
    #val_data = "../dataset/validation/"

    # Listing GPU info
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            print(e)

    # Hyperparams
    BATCH_SIZE = 16
    BUFFER_SIZE = 1000 # See https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle

    # Creating and splitting dataset
    DATASET_SIZE = len(glob(training_data + "images/*.jpg"))
    print(f"The Training Dataset contains {DATASET_SIZE} images.")

    TRAIN_SIZE = int(0.8 * DATASET_SIZE)
    VAL_SIZE = int(0.2 * DATASET_SIZE)

    full_dataset = tf.data.Dataset.list_files(training_data + "images/*.jpg", seed=SEED)
    full_dataset = full_dataset.shuffle(buffer_size=BUFFER_SIZE, seed=SEED)
    train_dataset = full_dataset.take(TRAIN_SIZE)
    val_dataset = full_dataset.skip(TRAIN_SIZE)
    
    # Creating dict pairs linking images and annotations
    train_dataset = train_dataset.map(parse_image)
    val_dataset = val_dataset.map(parse_image)

    # -- Train Dataset --# - https://stackoverflow.com/questions/49915925/output-differences-when-changing-order-of-batch-shuffle-and-repeat
    train_dataset = train_dataset.map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    train_dataset = train_dataset.shuffle(buffer_size=BUFFER_SIZE, seed=SEED)
    train_dataset = train_dataset.repeat()
    train_dataset = train_dataset.batch(BATCH_SIZE)
    train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)

    #-- Validation Dataset --#
    #for image, label in tfds.as_numpy(val_dataset):
     # print(type(image), type(label), label)
    
    val_dataset = val_dataset.map(load_image_val, num_parallel_calls=AUTOTUNE)
    val_dataset = val_dataset.repeat()
    val_dataset = val_dataset.batch(BATCH_SIZE)
    val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE)

    # for multi gpu distributed processing
    strategy = tf.distribute.MirroredStrategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

    # define model within scope to enable distributed learning
    with strategy.scope():
        model = sm.Unet(
        backbone,
        weights=weight_file,
        input_shape=(HEIGHT, WIDTH, N_CHANNELS), 
        encoder_weights='imagenet', 
        encoder_freeze=True,    # only training decoder network
        classes=NUM_CLASSES,
        activation='sigmoid'
        )
        
        # adding l1 regularization
        l1_reg = keras.regularizers.l1(0.1)
        sm.utils.set_regularization(model, kernel_regularizer=l1_reg)
        
        model.compile(
            'Adam', 
            loss=LOSS_FUNC, 
            metrics=[sm.metrics.iou_score] #was giving score over 100 in later epochs before normalizing masks
            #[tf.keras.metrics.MeanIoU(num_classes=2)]]
        )
        
    #Saves model with lowest loss on validation set
    checkpoint = tf.keras.callbacks.ModelCheckpoint(weight_file, monitor='val_loss', verbose=1, 
                             save_best_only=True, save_weights_only=True, 
                             mode='auto', save_frequency='epoch')
    
    #Stops Model training if loss on validation set does notdecrease after 30 epochs
    early_stop = tf.keras.callbacks.EarlyStopping(
                            monitor='val_loss', min_delta=0, patience=40, verbose=0, mode='auto',
                            baseline=None, restore_best_weights=True
)
    # TODO research step sizes
    history = model.fit(
    train_dataset,
    epochs=350,
    initial_epoch=229, # resuming training 8/18/2020
    steps_per_epoch=TRAIN_SIZE / BATCH_SIZE,
    validation_data=val_dataset,
    validation_steps= 0.2 * (VAL_SIZE / BATCH_SIZE),
    callbacks=[tensorboard_callback, checkpoint, early_stop]
    )

    # Saving model
    #model.save_weights("unet_500_weights_vgg16.h5")
    #model.save_weights(weight_file)

    # For reinstantiation
    #model = keras.models.load_model(your_file_path)

In [16]:
# function to display slice of batch in datasets, as images

%matplotlib inline
from matplotlib import pyplot as plt

def showDataImgs(train_dataset, val_dataset, n):
    train_dataset_np = tfds.as_numpy(train_dataset)
    val_dataset_np = tfds.as_numpy(val_dataset)
    

    for i in range(n):
        example = next(val_dataset_np)
    image = example['image']
    label = example['segmentation_mask']
    
    #print("val_label: ", label.shape)#label[11])
    #print("val_image: ", image.shape)
    
    #label_slice = label[n].reshape(HEIGHT,WIDTH)
    #label_slice = np.around(label_slice)
    #plt.imshow(label_slice, cmap='gray')
    label = label.reshape(HEIGHT,WIDTH)
    plt.imshow(label)
    plt.show()
    plt.imshow(image)
    #plt.imshow(image[n])
    plt.show()
    unique = np.unique(label)
    print(unique)
    if len(unique) > 2:
        with np.printoptions(threshold=np.inf):
            print(label)
    #print(label_slice)

    
    for i in range(n):
        example = next(train_dataset_np)
    image = example['image']
    label = example['segmentation_mask']
    
    #print("train_label: ",label.shape)#label[15])
    #print("train_image: ",image.shape)
    
    #label_slice = label[n].reshape(HEIGHT,WIDTH)
    #label_slice = np.around(label_slice)
    #plt.imshow(label_slice, cmap='gray')
    label = label.reshape(HEIGHT,WIDTH)
    plt.imshow(label)
    plt.show()
    plt.imshow(image)
    #plt.imshow(image[n])
    plt.show()
    unique = np.unique(label)
    print(unique)
    if len(unique) > 2:
        with np.printoptions(threshold=np.inf):
            print(label)
    #print(label_slice)

    #print(image[0])
    # label_grey = np.mean(label[n], -1)
    # label_grey.reshape((256,256, 1))

In [14]:
# # use to view images/masks in dataset
# AUTOTUNE = tf.data.experimental.AUTOTUNE
# SEED = 42
# training_data = "../dataset/training/"
# BATCH_SIZE = 16
# BUFFER_SIZE = 1000 # See https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle

# # Creating and splitting dataset
# DATASET_SIZE = len(glob(training_data + "images/*.jpg"))
# print(f"The Training Dataset contains {DATASET_SIZE} images.")

# TRAIN_SIZE = int(0.8 * DATASET_SIZE)
# VAL_SIZE = int(0.2 * DATASET_SIZE)

# full_dataset = tf.data.Dataset.list_files(training_data + "images/*.jpg", seed=SEED)
# full_dataset = full_dataset.shuffle(buffer_size=BUFFER_SIZE, seed=SEED)
# train_dataset = full_dataset.take(TRAIN_SIZE)
# val_dataset = full_dataset.skip(TRAIN_SIZE)
    
# # Creating d1ict pairs linking images and annotations
# train_dataset = train_dataset.map(parse_image)
# val_dataset = val_dataset.map(parse_image)

# showDataImgs(train_dataset, val_dataset, 3)

# # -- Train Dataset --# - https://stackoverflow.com/questions/49915925/output-differences-when-changing-order-of-batch-shuffle-and-repeat
# train_dataset = train_dataset.map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
# train_dataset = train_dataset.shuffle(buffer_size=BUFFER_SIZE, seed=SEED)
# train_dataset = train_dataset.repeat()
# train_dataset = train_dataset.batch(BATCH_SIZE)
# train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)

# #-- Validation Dataset --#
# val_dataset = val_dataset.map(load_image_val, num_parallel_calls=AUTOTUNE)
# val_dataset = val_dataset.repeat()
# val_dataset = val_dataset.batch(BATCH_SIZE)
# val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE)





In [15]:
# image = Image.open("../dataset/training/vectors/masks/mask_binary.png")
# data = np.asarray(image)
# print(type(data))
# print(data.shape)
# print(np.unique(data))

In [16]:
#for example in train_dataset.take(1):  # example is `{'image': tf.Tensor, 'label': tf.Tensor}`
#for example in next(val_dataset_np):
  #print(example)
  #image = example[0]
  #label = example[1]
  #label = tf.cast(label, tf.float32) / 255.0 #normalizing label  
  #print((image==label).all())
  #print(image.size)
#  image_img = Image.fromarray(image, 'RGB')
 #
  #plt.imshow(image)
  #plt.show()
#   image_img.save('image_test.png')
#   image_img.show()

#  label_img = Image.fromarray(label, 'RGB')
  #label= np.reshape(label, (256,256))
  #plt.imshow(label)
  #plt.show()
#   label_img.save('label_test.png')
#   label_img.show()

# for example in val_dataset.take(2):  # example is `{'image': tf.Tensor, 'label': tf.Tensor}`
#   #print(example)
#   #image = example[0]
#   #label = example[1]
#   print(image.shape, label.shape)

In [17]:
def test(backbone, weight_file, vector_files, raster_files):

    AUTOTUNE = tf.data.experimental.AUTOTUNE
    print(f"Tensorflow ver. {tf.__version__}")

    # For reproducibility
    SEED = 42

    # Relevant directories/files
    image_dir = "../dataset/testing/images"
    annotation_dir = "../dataset/testing/annotations"
    out_dir = "../dataset/testing/output"
    testing_data = "../dataset/testing/"
    model_weights = weight_file

    #Listing GPU info
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
       try:
           for gpu in gpus:
               tf.config.experimental.set_memory_growth(gpu, True)
           logical_gpus = tf.config.experimental.list_logical_devices('GPU')
           print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
       except RuntimeError as e:
           print(e)

    # Hyperparams
    BATCH_SIZE = 16
    BUFFER_SIZE = 1000 # See https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle

    model = sm.Unet(
        #'vgg16', 
        backbone,
        input_shape=(HEIGHT, WIDTH, N_CHANNELS), 
        encoder_weights='imagenet', 
        weights=model_weights,
        encoder_freeze=True,    # only training decoder network
        classes= NUM_CLASSES, 
        activation='sigmoid'
    )

    # Might be unnecessary
    model.compile(
        'Adam', 
        #loss=sm.losses.bce_jaccard_loss, 
        loss=LOSS_FUNC,
        metrics=[sm.metrics.iou_score]
    )
    
    test_dataset = glob(os.path.join(image_dir, "*.jpg"))

    #Loop for inference
    print("\nStarting inference... \n")
    for img_file in tqdm(test_dataset):
        tif_file = img_file.replace("jpg", "tif")

        img = np.asarray(Image.open(img_file)) / 255.0 # normalization (not) needed as we dont normalize the img for training
        img = img[np.newaxis, ...] # needs (batch_size, height, width, channels)
        pred_mask = model.predict(img)[0]
        pred_mask = create_mask(pred_mask)
        pred_mask = np.array(pred_mask).astype('uint8') * 255
        #print(pred_mask)
        # Reading metadata from .tif
        with rasterio.open(tif_file) as src:
            tif_meta = src.meta
            tif_meta['count'] = 1

        # Writing prediction mask as a .tif using extracted metadata
        mask_file = tif_file.replace("images", "output")
        
        with rasterio.open(mask_file, "w", **tif_meta) as dest:
            # Rasterio needs [bands, width, height]
            pred_mask = np.rollaxis(pred_mask, axis=2)
            dest.write(pred_mask)
	#printing out metrics
	#results = model.evaluate(img, pred_mask, batch_size=128)
	#print("IOU: ", results) 
    print("Merging tiles (to create mask ortho)...")
    call = "gdal_merge.py -o " + testing_data + "ortho_mask.tif " + " " + out_dir + "/*"
    print(call)
    subprocess.call(call, shell=True)
    
#     print("Creating raster_masks...")
#     vector_file = vector_files[0]
#     raster_file = raster_files[0]
#     raster_mask(raster_files[0], vector_files[0])
#     temp_dir = os.path.dirname(vector_file)
#     mask_file = os.path.join(temp_dir, "masks", "mask_binary.tif")
    
#     #out_dir = os.path.dirname(raster_file)
#     gen_seg_labels(out_width, raster_file, vector_file, mask_file, image_dir, True, True)
    
    
#     test_dataset = tf.data.Dataset.list_files(testing_data + "images/*.jpg", seed=SEED)
#     test_dataset = test_dataset.map(parse_image)
#     test_dataset = test_dataset.map(load_image_val, num_parallel_calls=AUTOTUNE)
#     test_dataset = test_dataset.batch(16)
    
#     map_file = os.path.join(image_dir, "map.txt")
#     create_seg_dataset(map_file, "testing", 0)
#     logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
#     tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir, update_freq='epoch')
#     print("Evaluating...")
#     model.evaluate(test_dataset, 
#                   callbacks=tensorboard_callback
#                   )

In [18]:
def test_optimized(backbone, weight_file):
    '''
    Note: This version of test does not work yet. It is optimized to be very efficient and works well for inference on .jpg files.
    It lacks the capabilities to link the output predictions to the input .jpgs since the filenames are lost when in the tf.dataset
    we map the parse image function. As a result, we need to somehow modify this dataset to retain filename information so we can use it
    to link the output prediction to the input image and its corresponding .tif file, which will be used to write the geospatial info to
    the prediction.

    Initial ideas would be to modify the parse image function and related functions to save filename info, and use this to link the images
    in the prediction stage by replacing .jpg with .tif in the filename.
    '''
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    print(f"Tensorflow ver. {tf.__version__}")

    # For reproducibility
    SEED = 42

    # Relevant directories/files
    images = "../dataset/testing/images"
    annotations = "../dataset/testing/annotations"
    testing_data = "../dataset/testing/"
    #model_weights = "unet_500_weights_vgg16.h5"
    model_weights = weight_file

    # Listing GPU info
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            print(e)

    # Hyperparams
    BATCH_SIZE = 16
    BUFFER_SIZE = 1000 # See https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle

    model = sm.Unet(
        'resnet34', 
        input_shape=(HEIGHT, WIDTH, N_CHANNELS), 
        encoder_weights='imagenet', 
        weights=model_weights,
        encoder_freeze=True,    # only training decoder network
        classes=2, 
        activation='softmax'
    )

    # Might be unnecessary
    model.compile(
        'Adam', 
        loss=LOSS_FUNC, 
        metrics=[sm.metrics.iou_score]
    )

    test_dataset = tf.data.Dataset.list_files(testing_data + "images/*.jpg", seed=SEED)
    test_dataset = test_dataset.map(parse_image)
    test_dataset = test_dataset.map(load_image_val, num_parallel_calls=AUTOTUNE)
    test_dataset = test_dataset.batch(BATCH_SIZE)
    test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

    image_data = []
    annotation_data = []
    
    '''
    for img_file in tqdm(os.listdir(images)): 
        annotation_file = "annotation_" + img_file.split('_')[1]
        img_file = os.path.join(images, img_file)
        ann_file = os.path.join(annotations, annotation_file)
        image = np.array(Image.open(img_file))
        annotation = np.array(Image.open(ann_file))
        image_data.append(image)
        annotation_data.append(annotation)
    '''


    #prediction = model.predict(test_dataset, steps=1)
    #print(type(prediction))


    #display([first_image[0], first_mask[0], create_mask(first_pred_mask)])

    #pred_mask = model.predict(test_dataset)
    #display([image[0], mask[0], create_mask(pred_mask)])

    show_predictions(model=model, dataset=test_dataset, num=20)

In [35]:
# #view downsmapled testing output ortho_mask.tif
# # !sudo apt-get update
# # !sudo apt-get install libgdal-dev -y
# # !sudo apt-get install python-gdal -y
# # !sudo apt-get install python-numpy python-scipy -y
# # !pip install rasterio
# # !pip install fiona
# # !pip install geopandas
# # !pip install -i https://test.pypi.org/simple/ gis-utils-pkg-dillhicks==0.0.1
# from gis_utils import raster
# from rasterio.plot import show

# #img_1, meta2 = raster.load_image("../")
# img_10, meta1 = raster.load_image("../dataset/training/vectors/masks/mask_binary.png")
# #img_1, meta10 = raster.load_image("../dataset/training/vectors/masks/mask.tif")

# #downsampling images 
# ds_factor = 1
# #resampled_1, transform = raster.downsample_raster(img_1, ds_factor)
# #resampled_10, transform = raster.downsample_raster(img_10, ds_factor)
# show(img_10)

# #for f in os.listdir("../dataset/training/annotations"):
#     #image = image.imRead("../dataset/training/annotations/" + f)
#     #show(image)
#     #pyplot.imshow(image)
#     #pyplot.show()
#     #print(np.asarray(image))
#     #img, meta = raster.load_image("../dataset/training/annotations/" + f)
#     #show(img)
#     #print(np.asarray(img))

In [19]:
def train_setup(raster_files, vector_files, out_width):
    # Uses raster and vector file to create dataset for training
    data_files = zip(raster_files, vector_files)
    map_files = [] 
    folderpath = "../dataset/training/"
    for raster_file, vector_file in data_files:
        # Generates raster masks
        print("Creating raster_masks...")
        raster_mask(raster_file, vector_file)
        temp_dir = os.path.dirname(vector_file)
        mask_file = os.path.join(temp_dir, "masks", "mask_binary.tif")

        # Generates segmentation labels
        out_dir = os.path.dirname(raster_file)
        gen_seg_labels(out_width, raster_file, vector_file, mask_file, out_dir, True, True)
        map_file = os.path.join(out_dir, "map.txt")
        map_files.append(map_file)
        
        #show downsampled raster_mask
        img_1, meta1 = raster.load_image("../dataset/training/vectors/masks/mask.tif")
        resampled_1, transform = raster.downsample_raster(img_1, 1/5)
        show(resampled_1)
        
        shutil.rmtree(folderpath + "vectors/masks")
        shutil.rmtree(folderpath + "vectors/nm")  #removng directories
        shutil.rmtree(folderpath + "vectors/m")

    # Creating dataset to train UNet
    create_seg_dataset(map_files, "training", 0)

In [26]:
def test_setup(raster_files, out_width):
    out_dir = "../dataset/testing/output"
    test_dir = "../dataset/testing"
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    print("\nTiling rasters...")
    for raster_file in raster_files:
        tile_raster(out_width, raster_file, test_dir, True, False)

In [20]:
#initalize arguments
img_p = "../dataset/training/images/"
vec_p = "../dataset/training/vectors/"

weight_file = "../dataset/training/weights/08_12_vgg16_350_full_weight.h5"
backbone = "vgg16"
out_width = "256"

#raster_files = ["../dataset/training/images/lap_2018-07_site04_120m_RGB_cc.tif", "../dataset/training/images/lap_2019-07_site06_120m_RGB_quick.tif"]
#vector_files = ["../dataset/training/vectors/lap_2018-07_site04_labels_m-nm.shp"]#, "../dataset/training/vectors/lap_2019-07_site06_labels_m-nm.shp"]
raster_files = [
                img_p + "lap_2018-07_site1_120m_RGB_cc.tif", 
                img_p + "lap_2019-07_site03_120m_RGB_quick.tif",
                img_p + "lap_2018-07_site05_120m_RGB_cc.tif",
                img_p + "lap_2019-07_site06_120m_RGB_quick.tif",
                img_p + "lap_2018-07_site04_120m_RGB_cc.tif",
                img_p + "lap_2018-07_site06_120m_RGB_cc.tif",
                img_p + "psc_2018-05_site01_120m_RGB_cc.tif",
                img_p + "psc_2018-05_site11_120m_RGB.tif",
                img_p + "psc_2018-05_site12_120m_RGB.tif",
                img_p + "psc_2018-05_site8.tif",
                img_p + "psc_2018-07_site08_120m_RGB.tif",
                img_p + "psc_2018-07_site11_120m_RGB.tif",
                img_p + "psc_2018-07_site10_120m_RGB.tif",
                img_p + "psc_2018-07_site09_120m_RGB.tif",
                img_p + "psc_2018-05_site13-14_120m_RGB.tif",
               ]

vector_files = [
                vec_p + "lap_2018-07_site01_labels_m-nm.shp", 
                vec_p + "lap_2019-07_site03_labels_m-nm.shp", 
                vec_p + "lap_2018-07_site05_120m_m-nm_dissolve.shp",
                vec_p + "lap_2019-07_site06_120m_labels_m-nm.shp",
                vec_p + "lap_2018-07_site04_labels_m-nm.shp",
                vec_p + "lap_2018-07_site06_120m_RGB_m-nm.shp",
                vec_p + "psc_2018-05_site01_120m_RGB_cc labels_m-nm.shp",
                vec_p + "psc_2018-05_site11_120m_RGB_dissolved.shp",
                vec_p + "psc_2018-05_site12_labels_m-nm.shp",
                vec_p + "psc_2018-05_site8_labels_m-nm.shp",
                vec_p + "psc_2018-07_site08_120m_RGB_labels_m-nm.shp",
                vec_p + "psc_2018-07_site11_120m_RGB_m-nm.shp",
                vec_p + "psc_2018-07_site_10_labels_m-nm.shp",
                vec_p + "psc_201807_site9_mnm.shp",
                vec_p + "psc_2018_05_site1314_120m_mnm.shp",
               ]



In [22]:
#Run Testing
test_setup(raster_files, out_width)
test(backbone, weight_file, vector_files, raster_files)

NameError: name 'test_setup' is not defined

In [None]:
#Run Training
#train_setup(raster_files, vector_files, out_width)
train(backbone, weight_file)

Tensorflow ver. 2.2.0
4 Physical GPUs, 4 Logical GPUs
The Training Dataset contains 106560 images.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
Number of devices: 4
Epoch 230/350
INFO:tensorflow:batch_all_reduce: 38 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 38 all-reduces with algorithm = nccl, num_packs = 1
Epoch 00230: val_loss improved from inf to 0.09223, saving model to ../dataset/training/weights/08_12_vgg16_350_full_weight.h5
Epoch 231/350
Epoch 00231: val_loss improved from 0.09223 to 0.08685, saving model to ../dataset/training/weights/08_12_vgg16_350_full_weight.h5
Epoch 232/350
Epoch 00232: val_loss did not improve from 0.08685
Epoch 233/350
Epoch 00233: val_loss improved from 0.08685 to 0.08247, saving model to ../dataset/training/weight

In [3]:
# PATH = os.getenv('PATH')
# %env PATH=/anaconda3/envs/py37_tensorflow/bin:$PATH
# Does not work, need to do ssh forwarding with port on vm
%load_ext tensorboard
%tensorboard --logdir logs/scalars

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 124870), started 0:00:45 ago. (Use '!kill 124870' to kill it.)

In [27]:
# if __name__ == "__main__":
#     TRAIN = False
#     TEST = False

#     parser = argparse.ArgumentParser(description="UNet Training and Inference Script (Note: order of rasters and vectors must correspond to one another)")
#     parser.add_argument("--width",help = "Width of output tiles")
#     parser.add_argument("--input_rasters", nargs='*', help = "space separated input orthomosaic (.tif)")
#     parser.add_argument("--input_vectors", nargs='*', help = "space separated input labels (.shp)")
#     parser.add_argument("--train", action='store_true', help = "training UNet")
#     parser.add_argument("--test", action='store_true', help = "testing UNet")
#     parser.add_argument("--weights", help = "path to weight file, either to save or use (.h5)")
#     parser.add_argument("--backbone", help = "segmentation model backbone, ex: resnet34, vgg16, etc.")
#     args = parser.parse_args()

#     # Parsing arguments
#     if args.width:
#         out_width = args.width
#     else:
#         print("Need to specify width, exiting.")
#         exit()
#     if args.input_rasters:
#         raster_files = args.input_rasters
#     else:
#         # Always needs a raster
#         print("Need to specify raster file, exiting.")
#         exit()
#     if args.input_vectors:
#         vector_files = args.input_vectors
#     else:
#         # Requires vector labes for training, not inference
#         if args.train:
#             print("Need to specify input vector, exiting.")
#             exit()
#     if args.train and args.test:
#         print("Can't train and test at the same time... exiting.")
#         exit()
#     elif args.train:
#         TRAIN = True
#     elif args.test:
#         TEST = True
#     if args.weights:
#         weight_file = args.weights
#     else:
#         print("Need weight file, exiting.")
#         exit()
#     if args.backbone:
#         backbone = args.backbone
#     else:
#         print("Need to specify backbone, exiting.")
#         exit()

#     # Selecting mode
#     if TRAIN: 
#         train_setup(raster_files, vector_files, out_width)
#         train(backbone, weight_file)
#     if TEST:
#         test_setup(raster_files, out_width)
#         test(backbone, weight_file)
#         #test_optimized(backbone, weight_file)

In [21]:
import shutil
#delete all training/images, /images/images, /images/labels, images/masks, 
#images/m, images/nm, annotations

#delete all files with extension
folderpath = "../dataset/training/"
for file_name in os.listdir(folderpath + "images/images"):
    if file_name.endswith('.jpg'):
        os.remove(folderpath + "images/images/" + file_name)
for file_name in os.listdir(folderpath + "images"):
    if file_name.endswith('.jpg'):
        os.remove(folderpath + "images/" + file_name)
for file_name in os.listdir(folderpath + "images/labels"):
    if file_name.endswith('.jpg'):
        os.remove(folderpath + "images/labels/" + file_name)
# shutil.rmtree("../dataset/testing/output")
# shutil.rmtree("../dataset/testing/images")
shutil.rmtree(folderpath + "vectors/masks")
shutil.rmtree(folderpath + "vectors/nm")  #removng directories
shutil.rmtree(folderpath + "vectors/m")


FileNotFoundError: [Errno 2] No such file or directory: '../dataset/training/vectors/masks'