<a href="https://colab.research.google.com/github/Max-FM/seagrass/blob/master/notebooks/prepare_training_data_banc_d_arguin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Preparing Banc d'Arguin imaging to create seagrass training data for machine learning

##Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

##Install `seagrass` package

In [None]:
%%capture

!pip install git+https://github.com/Max-FM/seagrass.git

##Create mosaic from Sentinel 2 images and import projected seagrass data

In [None]:
from seagrass.raster import open_and_match_rasters

In [None]:
#  Scenes of interest are numbers 7, 8, 11, 12 & 13, particulary 11 and 12.
scene_number = '000011'
s2_filepath = f'/content/drive/MyDrive/Bathymetry/BancDarguin_s2cldmdn_{scene_number}.tif'
seagrass_filepath = '/content/drive/Shareddrives/1_Satellite_Derived_Bathymetry & coastal veg/Banc dArguin bathymetry & seagrass/seagrass_geotiff/seagrass_combined.tif'

In [None]:
s2, seagrass = open_and_match_rasters(s2_filepath, seagrass_filepath)

##Mask out land pixels

In [None]:
#  Normalised Difference Water Index (NDWI)
def ndwi(s2):
    green = s2[4]
    nir = s2[9]
    return (green - nir) / (green + nir)

#  Creates a land pixel mask using the NDWI as a threshold.
def land_mask(s2):
    return ndwi(s2).values < 0    

In [None]:
seagrass = seagrass.where(land_mask(s2)==False, -9999)

##Plot mosaic and seagrass images

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from rasterio.plot import show
import numpy as np

In [None]:
def normalize(array):
    """Normalizes numpy arrays into scale 0.0 - 1.0"""
    array_min, array_max = array.min(), array.max()
    return ((array - array_min)/(array_max - array_min))

def make_composite(band_1, band_2, band_3):
    """Converts three raster bands into a composite image"""
    return normalize(np.dstack((band_1, band_2, band_3)))

def make_s2_rgb(s2_raster):
    red = s2_raster[5]
    green = s2_raster[4]
    blue = s2_raster[3]

    return make_composite(red, green, blue)

In [None]:
rgb = make_s2_rgb(s2)

In [None]:
plt.figure(figsize=(15,15))
plt.imshow(rgb)

rgb.shape

In [None]:
plt.figure(figsize=(15,15))
show(seagrass.where(seagrass!=-9999))

##Define features and targets for machine learning

In [None]:
from seagrass.prepare import create_training_data
from seagrass.utils import save_training_data

In [None]:
%%time

X, y = create_training_data(s2.values, seagrass.values, no_data_value=-9999, s2_bands=[3,4,5,6,7,8,9])

In [None]:
display(X, y)
display(X.shape, y.shape)

##Save training data to Modulos compatible tar file

In [None]:
training_dir = '/content/drive/Shareddrives/1_Satellite_Derived_Bathymetry & coastal veg/Banc dArguin bathymetry & seagrass/seagrass_training_data'
# training_dir = '.'

# Optional column header labels.
cols = ['b', 'g', 'r', 're1', 're2', 're3', 'nir', 'b_g', 'g_g', 'r_g', 're1_g', 're2_g', 're3_g', 'nir_g', 'seagrass']

In [None]:
from datetime import date

timestamp = str(date.today())
train_filepath = f'{training_dir}/banc_d_arguin_seagrass_train_{scene_number}_{timestamp}.tar'

train_filepath

In [None]:
save_training_data(train_filepath, X, y, column_labels=cols)