# Assignment 3
# 2D Fault Detection
### Due date: December 13, 2021
In this code, a neural network model will be trained with synthetic data as an image segmentation solution to detect faults on seismic sessions.\
Your work: 

* complete missing parts (in loading data)
* write the network.
* try predicting on the validation set

First, loading some packages:

In [None]:
import os
import sys
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import seaborn as sns
sns.set_style('white')
import glob
import time

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model, load_model, save_model
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import tensorflow as tf

%matplotlib inline

In [None]:
tf.config.list_physical_devices('GPU')

## Data reading and preprocessing
The data for this assignment is in D2L. It consist of a zip file with seismic images and labels (masks).\
The data are already divided in training and validation.


Setting image files locations:

In [None]:
# Write the path to the files (read from D2L and upload to COLAB or copy to your system) 
MYPATH= ....

In [None]:
# Training set
folder_train_x = MYPATH + "/2D_train/seis/*.png"
folder_train_y = MYPATH + "/2D_train/fault/*.png"

# Validation set
folder_validation_x = ...
folder_validation_y = ...

Defining meta-data:

In [None]:
image_size = (128, 128)
batch_size = 20
seed = 666

Loading images and masks:
Using as example the code for the 
* folder_train_x, \
complete the reading, loading and preprocessing for:
* folder_train_y, 
* folder_validation_x 
* folder_validation_y.

In [None]:
# Training
xfiles = glob.glob(folder_train_x)
yfiles = ...
xfiles = np.sort(xfiles)
yfiles = ...
train_images = np.array([np.array(load_img(i, color_mode = "grayscale")) / 255 for i in xfiles])
train_masks = ...

In [None]:
print(train_images.shape, train_masks.shape)

In [None]:
# Validation
xfiles = ...
yfiles = ...
xfiles = ...
yfiles = ...
validation_images = ...
validation_masks = ...

In [None]:
print(validation_images.shape, validation_masks.shape)

Reshaping `numpy` arrays for the modeling:

In [None]:
# Training set
train_images = train_images.reshape(-1,128,128,1)
train_masks = train_masks.reshape(-1,128,128,1)

# Validation set
validation_images = validation_images.reshape(-1,128,128,1)
validation_masks = validation_masks.reshape(-1,128,128,1)

Data augmentation:

In [None]:
train_images = np.append(train_images, [np.fliplr(x) for x in train_images], axis=0)
train_masks = np.append(train_masks, [np.fliplr(x) for x in train_masks], axis=0)
train_images = np.append(train_images, [np.flipud(x) for x in train_images], axis=0)
train_masks = np.append(train_masks, [np.flipud(x) for x in train_masks], axis=0)

In [None]:
fig, axs = plt.subplots(4, 10, figsize=(22,8))
for i in range(10):
    axs[0][i].imshow(train_images[i].squeeze(), cmap="Greys")
    axs[0][i].imshow(train_masks[i].squeeze(), cmap="Greens", alpha=0.3)
    axs[1][i].imshow(train_images[int(len(train_images)/4 + i)].squeeze(), cmap="Greys")
    axs[1][i].imshow(train_masks[int(len(train_masks)/4 + i)].squeeze(), cmap="Greens", alpha=0.3)
    axs[2][i].imshow(train_images[int(len(train_images)/2 + i)].squeeze(), cmap="Greys")
    axs[2][i].imshow(train_masks[int(len(train_masks)/2 + i)].squeeze(), cmap="Greens", alpha=0.3)
    axs[3][i].imshow(train_images[int(len(train_images)/4*3 + i)].squeeze(), cmap="Greys")
    axs[3][i].imshow(train_masks[int(len(train_masks)/4*3 + i)].squeeze(), cmap="Greens", alpha=0.3)
fig.suptitle("Top row: original images, bottom 3 rows: augmented images")

In [None]:
print(train_images.shape, train_masks.shape)

## Exercise: 
write your network. It can be a unet, or any other you would like to try.

In [None]:
def unet():
    ...

Generating model:

In [None]:
model_type = "myunet"
model1 = unet()

model1.compile(
    loss = "binary_crossentropy", 
    optimizer = "adam",
    metrics = ["accuracy"])

model1.summary()

### Training model:
use the path below to save your models in unique directories for testing. 

In [None]:
version = 1
save_model_name = f"./model/{model_type}_trained_v{version}.model"
save_model_name

In [None]:
early_stopping = EarlyStopping(
    monitor = 'accuracy', 
    mode = 'max',
    patience = 15, 
    verbose = 1)

model_checkpoint = ModelCheckpoint(
    save_model_name, 
    monitor = 'accuracy', 
    mode = 'max',
    save_best_only = True, 
    verbose = 1)

reduce_lr = ReduceLROnPlateau(
    monitor = 'accuracy', 
    mode = 'max', 
    factor = 0.5, 
    patience = 7,
    min_lr = 0.0001, 
    verbose = 1)

epochs = 10
batch_size = 20

t_model1_start = time.time()
history = model1.fit(train_images, train_masks,
                     validation_data = (validation_images, validation_masks), 
                     epochs = epochs, 
                     batch_size = batch_size, 
                     callbacks = [early_stopping, model_checkpoint, reduce_lr],
                     verbose = 1)
t_model1_end = time.time()
print(f"Run time = {(t_model1_end-t_model1_start)/3600} hours")

# PART 2: prediction

In the second part, we load the model from disk and use it for prediction.\
If you have more than one model saved, choose the best.

In [None]:
version = 1
model_type = "myunet"
model_name = f"./model/{model_type}_trained_v{version}.model"
model = load_model(model_name)

In [None]:
print(validation_images.shape)

In [None]:
model.summary()

In [None]:
preds_valid = model.predict(COMPLETE HERE...)

In [None]:
preds_valid = preds_valid.reshape(-1, 128, 128)

In [None]:
from random import randint
def plotImageTranspValid(file1, file2, file3, k, alpha1 = 0.2, alpha2 = 0.2):
    fig, ax = plt.subplots(nrows=k, ncols=k, figsize=(18, 18))
    for i in range(k):
        for j in range(k):
            ind = randint(0,file1.shape[0]-1)
            ax[i,j].imshow(file1[ind], cmap='Greys')
            ax[i,j].imshow(file2[ind], cmap='Blues', alpha = alpha1)
            ax[i,j].imshow(file3[ind], cmap='Reds', alpha = alpha2)
            ax[i,j].set_axis_off()
    fig.subplots_adjust(wspace = -0.15, hspace = 0.02)
    plt.suptitle("Blue: faults, Red: prediction.")
    return(fig)

In [None]:
fig = plotImageTranspValid(validation_images, validation_masks, preds_valid, k = 8, alpha1 = 0.2, alpha2 = 0.3)

### Select a better threshold

In [None]:
import numpy as np

# src: https://www.kaggle.com/aglotero/another-iou-metric
def iou_metric(y_true_in, y_pred_in, print_table=False):
    labels = y_true_in
    y_pred = y_pred_in
    
    true_objects = 2
    pred_objects = 2

    intersection = np.histogram2d(labels.flatten(), y_pred.flatten(), bins=(true_objects, pred_objects))[0]

    # Compute areas (needed for finding the union between all objects)
    area_true = np.histogram(labels, bins = true_objects)[0]
    area_pred = np.histogram(y_pred, bins = pred_objects)[0]
    area_true = np.expand_dims(area_true, -1)
    area_pred = np.expand_dims(area_pred, 0)

    # Compute union
    union = area_true + area_pred - intersection

    # Exclude background from the analysis
    intersection = intersection[1:,1:]
    union = union[1:,1:]
    union[union == 0] = 1e-9

    # Compute the intersection over union
    iou = intersection / union

    # Precision helper function
    def precision_at(threshold, iou):
        matches = iou > threshold
        true_positives = np.sum(matches, axis=1) == 1   # Correct objects
        false_positives = np.sum(matches, axis=0) == 0  # Missed objects
        false_negatives = np.sum(matches, axis=1) == 0  # Extra objects
        tp, fp, fn = np.sum(true_positives), np.sum(false_positives), np.sum(false_negatives)
        return tp, fp, fn

    # Loop over IoU thresholds
    prec = []
    if print_table:
        print("Thresh\tTP\tFP\tFN\tPrec.")
    for t in np.arange(0.5, 1.0, 0.05):
        tp, fp, fn = precision_at(t, iou)
        if (tp + fp + fn) > 0:
            p = tp / (tp + fp + fn)
        else:
            p = 0
        if print_table:
            print("{:1.3f}\t{}\t{}\t{}\t{:1.3f}".format(t, tp, fp, fn, p))
        prec.append(p)
    
    if print_table:
        print("AP\t-\t-\t-\t{:1.3f}".format(np.mean(prec)))
    return np.mean(prec)

def iou_metric_batch(y_true_in, y_pred_in):
    batch_size = y_true_in.shape[0]
    metric = []
    for batch in range(batch_size):
        value = iou_metric(y_true_in[batch], y_pred_in[batch])
        metric.append(value)
    return np.mean(metric)

In [None]:
thresholds = np.linspace(0, 1, 50)
ious = np.array([iou_metric_batch(validation_masks, np.int32(preds_valid > threshold)) for threshold in thresholds])

In [None]:
plt.plot(ious)

In [None]:
threshold_best_index = np.argmax(ious[9:-10]) + 9
iou_best = ious[threshold_best_index]
threshold_best = thresholds[threshold_best_index]

In [None]:
plt.plot(thresholds, ious)
plt.plot(threshold_best, iou_best, "xr", label="Best threshold")
plt.xlabel("Threshold")
plt.ylabel("IoU")
plt.title("Threshold vs IoU ({}, {})".format(threshold_best, iou_best))
plt.legend()

New predictions using best threshold:

In [None]:
predictions = np.int32(preds_valid > threshold_best)

In [None]:
plt.imshow(preds_valid[0])
plt.colorbar()

In [None]:
plt.imshow(predictions[0])
plt.colorbar()

Define a function to plot

In [None]:
def plotImageTransp(file1, file2, k, alpha = 0.2):
    fig, ax = plt.subplots(nrows=k, ncols=k, figsize=(18, 18))
    for i in range(k):
        for j in range(k):
            ind = randint(0,file1.shape[0]-1)
            ax[i,j].imshow(file1[ind], cmap='Greys')
            ax[i,j].imshow(file2[ind], cmap='Purples', alpha = alpha)
            ax[i,j].set_axis_off()
    fig.subplots_adjust(wspace = -0.15, hspace = 0.02)
    return(fig)

In [None]:
fig = plotImageTransp(validation_images, predictions, k = 8)