# Module 2: Read Data and Detect Objects
Read GeoTIFF data and perform object detection using K-Means clustering.

Package installation requirements:
- Matplotlib (`matplotlib`): https://pypi.org/project/matplotlib/
- NumPy (`numpy`): https://pypi.org/project/numpy/
- Rasterio (`rasterio`): https://pypi.org/project/rasterio/
- SciKit Learn (`scikit-learn`): https://pypi.org/project/scikit-learn/
- Xarray (`xarray`): https://pypi.org/project/xarray/0.8.0rc1/
- Rasterio Xarray Extension (`rioxarray`): https://pypi.org/project/rioxarray/

***
## User Input
Define the key inputs for the module here:
1. Path to the dataset.
2. K-means clustering details.

### Path to Data

In [None]:
# define the path to the tiff output data from module 1 (or tiffs defined elsewhere)
try:
    req_id
except NameError:
    var_exists = False
    path_to_tiff = ''
else:
    var_exists = True

if var_exists:
    print('Found request ID generated from Module 1: '+req_id)
    path_to_tiff = f
else:
    if path_to_tiff == '':
        path_to_tiff = input('Please define the path to the GeoTIFF:')
    else:
        print('Path to GeoTIFF has been defined.')

### K-Means Setup
The number of clusters used for the K-means clustering are defined here, separately for clustering across the normalised difference vegetation index (NDVI) and the floating debris index (FDI).

In [None]:
# define the number of clusters for the K-means algorithm
n_clusters_ndvi = 3
n_clusters_fdi = 3

***
## Load Packages and Define Functions

In [None]:
import rasterio
import matplotlib.pyplot as plt
import numpy as np
from rasterio.plot import show, show_hist
import xarray as xr
import rioxarray
from sklearn.cluster import KMeans

## Load Data

In [None]:
# open tiff with rasterio
data = rasterio.open(path_to_tiff)
xda = rioxarray.open_rasterio(path_to_tiff)
print('GeoTIFF opened, '+str(len(xda.band))+' bands found.')
xda

In [None]:
# key dataset details
print(xda.rio.crs) # coordinate reference system
print(xda.rio.nodata) # nodata values
print(xda.rio.bounds()) # lat/lon bounds
print(xda.rio.width) # width of the image
print(xda.rio.height) # height of the image

In [None]:
# define the relevant Sentinel-2 bands
BLU = xda.data[1] # blue band (B02)
GRN = xda.data[2] # green band (B03)
RED = xda.data[3] # red band (B04)
RE2 = xda.data[5] # second red edge band (B06)
NIR = xda.data[7] # near infrared (NIR) band (B08)
SWIR1 = xda.data[11] # first short wave infrared (SWIR) band (B11)
SWIR2 = xda.data[12] # second short wave infrared (SWIR) band (B12)

# define the spatial extent
lon = xda.x.data
lat = xda.y.data
extent = [np.min(lon), np.max(lon), np.max(lat), np.min(lat)]

# plot example RGB data
fig, ax = plt.subplots(1,3, figsize=(21,7))
cols = ('Blues', 'Greens', 'Reds') # colour maps for plotting
for i in np.arange(1,4):
    plt.subplot(1,3,i) 
    plt.imshow(xda.data[i], cmap=cols[i-1], extent=extent) # plot the appropriate band
    plt.title(cols[i-1][:-1]+' channel')

## K-Means Clustering (NDVI)
Clustering across the normalised difference vegetation index (NDVI).

In [None]:
ndvi = (NIR-RED)/(NIR+RED) # normalised difference vegetation index (NDVI)
ndvi_clust = ndvi.reshape((-1, 1)) # reshape to allow clustering

# check for nodata
nd = np.isnan(ndvi)
ndvi[nd] = np.nanmean(ndvi) # rough mean filter to allow for K-means clustering

# perform K-means clustering
kmeans_ndvi = KMeans(n_clusters=n_clusters_ndvi)
kmeans_ndvi.fit(ndvi_clust)
centroids_ndvi = kmeans_ndvi.cluster_centers_
labels_ndvi = kmeans_ndvi.labels_

# reshape output data for plotting
debris_ndvi = np.choose(labels_ndvi, centroids_ndvi)
debris_ndvi.shape = ndvi.shape
labels_ndvi.shape = ndvi.shape

# plot results
fig, ax = plt.subplots(figsize=(8,8))
ax.set_title('NDVI Classification')
pos = ax.imshow(labels_ndvi, cmap=plt.get_cmap('viridis', n_clusters_ndvi), extent=extent)
cbar = fig.colorbar(pos, ticks=np.arange(0,n_clusters_ndvi), orientation='horizontal')
cbar.ax.set_xticklabels(['0','1','2'])  # horizontal colourbar
cbar.ax.set_xlabel('Cluster Number')
plt.ylabel('Latitude [degrees]')
plt.xlabel('Longitude [degrees]')
plt.show()

## K-Means Clustering (FDI)
Clustering across the floating debris index (FDI), which is defined in: Biermann, L., Clewley, D., Martinez-Vicente, V., and Topouzelis, K. (2020). Finding Plastic Patches in Coastal Waters using Optical Satellite Data. *Nature: Scientific Reports*, 10:5364. https://doi.org/10.1038/s41598-020-62298-z

In [None]:
NIRprime = RE2+(SWIR1-RE2)*((833-665)/(1610.4-665))*10
fdi = NIR-NIRprime # floating debris index (FDI)
fdi_clust = fdi.reshape((-1, 1)) # reshape to allow clustering

# check for nodata
nd = np.isnan(fdi)
fdi[nd] = np.nanmean(fdi) # rough mean filter to allow for K-means clustering

# perform K-means clustering
kmeans_fdi = KMeans(n_clusters=n_clusters_fdi)
kmeans_fdi.fit(fdi_clust)
centroids_fdi = kmeans_fdi.cluster_centers_
labels_fdi = kmeans_fdi.labels_

# reshape output data for plotting
debris_fdi = np.choose(labels_fdi, centroids_fdi)
debris_fdi.shape = fdi.shape
labels_fdi.shape = fdi.shape

# plot results
fig, ax = plt.subplots(figsize=(8,8))
ax.set_title('FDI Classification')
pos = ax.imshow(labels_fdi, cmap=plt.get_cmap('viridis', n_clusters_fdi), extent=extent)
cbar = fig.colorbar(pos, ticks=np.arange(0,n_clusters_fdi), orientation='horizontal')
cbar.ax.set_xticklabels(['0','1','2'])  # horizontal colorbar
plt.ylabel('Latitude [degrees]')
plt.xlabel('Longitude [degrees]')
cbar.ax.set_xlabel('Cluster Number')
plt.show()

## Overlap NDVI-FDI Results
Produce a map that shows the areas that are defined in **both** the NDVI and FDI clustering approaches.

In [None]:
# define the clusters to select for analysis
ndvi_clust = 1
fdi_clust = 2

In [None]:
res_fdi = labels_fdi.reshape((-1, 1))
res_ndvi = labels_ndvi.reshape((-1, 1))

# reassign pixels
res = np.zeros(len(res_fdi),)
for i in range(len(res_fdi)):
    if res_fdi[i] == fdi_clust:
        if res_ndvi[i] == ndvi_clust:
            res[i] = 1
        else:
            res[i] = 0
    else:
        res[i] = 0

res.shape = fdi.shape

In [None]:
fig, ax = plt.subplots(figsize=(8,8))
ax.set_title('Combined Classification of Floating Objects')
pos = ax.imshow(res, cmap=plt.get_cmap('Greys', 2), extent=extent)
cbar = fig.colorbar(pos, ticks=[0,1], orientation='horizontal')
cbar.ax.set_xticklabels(['Not Debris', 'Debris'])  # horizontal colorbar
plt.ylabel('Latitude [degrees]')
plt.xlabel('Longitude [degrees]')
cbar.ax.set_xlabel('Classification')
plt.show()

## Mask Land

In [None]:
land = np.greater(SWIR1,BLU)
res[land] = "NaN"

fig, ax = plt.subplots(figsize=(8,8))
ax.set_title('Combined Classification of Floating Objects')
pos = ax.imshow(res, cmap=plt.get_cmap('Greys', 2), extent=extent)
cbar = fig.colorbar(pos, ticks=[0,1], orientation='horizontal')
cbar.ax.set_xticklabels(['Not Debris', 'Debris'])  # horizontal colorbar
plt.ylabel('Latitude [degrees]')
plt.xlabel('Longitude [degrees]')
cbar.ax.set_xlabel('Classification')
plt.show()

## Export Classification Raster

In [None]:
# export raster
with rasterio.open(r'C:\Users\uqdcowl2\DATA\OHW24_ocean_object_detection\classified.tiff', 
                   'w',driver='GTiff',height=res.shape[0],width=res.shape[1],
                   count=1,dtype=res.dtype,crs=xda.rio.crs,nodata="NaN",transform=data.transform) as dst:dst.write(res, 1)