# Image segmentation

Testing algorithms for extracting image objects of cropping over Africa on Sentinel 2 images

Requirements:
- Works well on small-scale ag
- works well on large scale ag
- fast and/or works in parallel
- scaleable to very large AEZs



`Pathos` and `rios` are required for the parallel image seg of Shepherd Seg, which is available on the `unstable` sandbox

    !pip install pathos
    !pip install git+https://github.com/ubarsc/rios.git

    

In [None]:
!pip install pathos
!pip install git+https://github.com/ubarsc/rios.git

In [None]:
import rsgislib
import gdal
import xarray as xr
import numpy as np
import datacube
import sys
from skimage.segmentation import quickshift
from datacube.utils.cog import write_cog
import matplotlib.pyplot as plt
from sklearn.cluster import MeanShift, estimate_bandwidth
import tiledSegParallel
from rsgislib.segmentation import segutils, meanImage, tiledsegsingle

sys.path.append('../Scripts')
from deafrica_datahandling import load_ard
from deafrica_bandindices import calculate_indices
from deafrica_classificationtools import HiddenPrints

### load data

In [None]:
dc = datacube.Datacube(app='image_seg')

# Define area of interest
lat = -0.1760 #-34.263 
lon = 36.154 #19.603
lon_buffer = 2.0 #0.0175
lat_buffer = 2.0 #0.004

# Combine central lat,lon with buffer to get area of interest
lat_range = (lat-lat_buffer, lat+lat_buffer)
lon_range = (lon-lon_buffer, lon+lon_buffer)

# Set the range of dates for the analysis
years_range = ('2018-12')

In [None]:
# Create a reusable query
query = {
    'y': lat_range,
    'x': lon_range,
    'time': years_range,
    'measurements': ['red', 'nir'],
    'resolution': (-30,30),
    'output_crs': 'epsg:6933'
}

# Load available data from Landsat 8
ds = dc.load(product='ga_ls8c_gm_2_annual',
              **query,
              )

print(ds)

In [None]:
ndvi = calculate_indices(ds, index='NDVI', drop=True, collection='s2')

#handle all-NaN slices
mask=ndvi.NDVI.isnull().all('time')
ndvi = ndvi.where(~mask, 0)

In [None]:
write_cog(ndvi.NDVI.max('time'),
          'meanNDVI.tif', overwrite=True)

## RSGISlib Shepherd Seg

In [None]:
# Name of the GeoTIFF to export then and segment
tiff_to_segment = 'meanNDVI.tif' 

# Name of the .kea file the GeoTIFF will be converted too
kea_file = 'meanNDVI.kea'

# Name of the segmented .kea file that will be output 
segmented_kea_file = 'meanNDVI_segmented.kea'

# Name of the segmented .kea file attributed with the zonal mean of input file
segments_zonal_mean = 'segments_zonal_mean_shepherdSeg.tif'

# Location to a folder to store temporary files during segmentation
temp = 'tmps/'

# How many cpus will this run on?
ncpus=14

# what fraction of a tile should contain valid data? Below this threshold
# a tile will be merged with its neighbour. 
validDataTileFraction = 0.3

# enter the tile size parameters (in number of pixels)
width = 4000
height = 4000


In [None]:
# Convert the GeoTIFF into a KEA file format
gdal.Translate(destName=kea_file,
               srcDS=tiff_to_segment,
               format='KEA',
               outputSRS='EPSG:6933')

### single cpu, no tiles

In [None]:
%%time
segutils.runShepherdSegmentation(inputImg=kea_file,
                                 outputClumps=segmented_kea_file,
                                 outputMeanImg=segments_zonal_mean,
                                 numClusters=60,
                                 minPxls=10)


In [None]:
# Open and plot the segments attributed with zonal mean NDVI
result = xr.open_rasterio(segments_zonal_mean)
# result.plot(vmin=0.1, vmax=1.0, figsize=(8, 8), cmap='gist_earth_r')

In [None]:
result

In [None]:
result.isel(x=range(750,1000), y=range(1200,1500)).plot(vmin=0.1, vmax=1.0,figsize=(10, 10), cmap='gist_earth_r')

### single cpu, tiled

In [None]:
%time
# #run the segmentation
with HiddenPrints():
    tiledsegsingle.performTiledSegmentation(kea_file,
                                    segmented_kea_file,
                                    tmpDIR=temp,
                                    numClusters=60,
                                    validDataThreshold=validDataTileFraction, 
                                    tileWidth=width,
                                    tileHeight=height,
                                    minPxls=9)

In [None]:
# Attribute segments with zonal mean of input image and output as geotiff
meanImage(tiff_to_segment, segmented_kea_file, segments_zonal_mean, "GTIFF",rsgislib.TYPE_32FLOAT)

### n cpus, tiled

In [None]:
# %time
#run the segmentation
with HiddenPrints():
    tiledSegParallel.performTiledSegmentation(kea_file,
                                segmented_kea_file,
                                tmpDIR=temp,
                                numClusters=60,
                                validDataThreshold=validDataTileFraction, 
                                tileWidth=width,
                                tileHeight=height,
                                minPxls=9,
                                ncpus=ncpus)

In [None]:
# Attribute segments with zonal mean of input image and output as geotiff
meanImage(tiff_to_segment, segmented_kea_file, segments_zonal_mean, "GTIFF",rsgislib.TYPE_32FLOAT)

## Mean-shift

In [None]:
%%time
# The following bandwidth can be automatically detected using
bandwidth = estimate_bandwidth(ndvi_mean.values, quantile=0.5, n_jobs=-1)

In [None]:
%%time
original_shape = ndvi_mean.shape # so we can reshape the labels later

samples = np.column_stack([ndvi_mean.values.flatten()])

clf = MeanShift(bin_seeding=True,
               min_bin_freq=10,
                n_jobs=-1)

labels = clf.fit_predict(samples).reshape(original_shape)

plt.imshow(labels)
plt.show()