# Computes NDWI based land-water masks from Sentinel-2 data and compares with SAR masks from Sentinel-2

In [None]:
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
from osgeo import gdal
from rasterio.warp import reproject, Resampling
from rasterio.mask import mask

from skimage.filters import threshold_otsu
import pandas as pd
from IPython.display import HTML, display

## Import NDWI images
- These are computed on Google earth engine as yearly median using images with cloud cover less than 20%

In [None]:
sentinel_ndwi_dir = '/Users/bvarugu/Documents/Belomonte/Xingu_Sentinel2/NDWI/';
ndwi_image_list = glob.glob(os.path.join(sentinel_ndwi_dir,'NDWI*_UTM.tif'));


## This cell is to resample the waterFrequency raster to to the same resolution as the NDWI image

In [None]:
waterFreq_raster_file = '/Users/bvarugu/Documents/Belomonte/GIS_layers/open_water_frequency_Xingu_clip.tif'
sample_ndwi_file = ndwi_image_list[0]
with rasterio.open(sample_ndwi_file) as src_ndwi:
    ndwi_data = src_ndwi.read(1)  # Assuming single band data
    ndwi_profile = src_ndwi.profile
    ndwi_crs = src_ndwi.crs
    ndwi_transform = src_ndwi.transform
    ndwi_width = src_ndwi.width
    ndwi_height = src_ndwi.height
with rasterio.open(waterFreq_raster_file) as src_waterFreq:
    # Calculate the transform for the resampled data
    dst_transform, dst_width, dst_height = rasterio.warp.calculate_default_transform(
        src_waterFreq.crs, ndwi_crs, src_waterFreq.width, src_waterFreq.height, *src_waterFreq.bounds, 
        dst_width=ndwi_width, dst_height=ndwi_height, resolution=None
    )

    # Create an empty array to store the resampled data
    waterFreq = np.empty((ndwi_height, ndwi_width), dtype=src_waterFreq.dtypes[0])

    # Perform the resampling
    rasterio.warp.reproject(
        source=rasterio.band(src_waterFreq, 1),
        destination=waterFreq,
        src_transform=src_waterFreq.transform,
        src_crs=src_waterFreq.crs,
        dst_transform=dst_transform,
        dst_crs=ndwi_crs,
        resampling=Resampling.nearest  # Adjust as needed
    )

## This cell would compute a histogram over the NDWI image for the waterFrequency pixels to get optimal NDWI threshold for creating Land-Water masks. 
- Saves the Land Water masks as tifs

In [None]:
#get optimal NDWI threshold for land water masks
def get_land_water_mask_otsu(ndwi_image_path, waterFreq, max_freq, min_freq, save=None, output_mask_path=None):
    # Read NDWI image
    dataset = gdal.Open(ndwi_image_path)
    if dataset is None:
        raise Exception(f"Failed to open image: {ndwi_image_path}")

    # Get image data and band information
    band = dataset.GetRasterBand(1)  # Assuming VH polarization is in the first band
    ndwi_data = band.ReadAsArray();
    nodata_value = band.GetNoDataValue();#print(nodata_value)
    valid_mask = (ndwi_data > -1)    
    #extract pixels to generate mask
    max_freq = 90; min_freq =1;
    mask = (waterFreq >= max_freq) | (waterFreq <= min_freq) #& (ndwi_data>-50);

    # Apply the mask to extract ndwi values
    ndwi_values = ndwi_data[mask];
    ndwi_values = ndwi_values[ndwi_values>-1];

    otsu_threshold = threshold_otsu(ndwi_values);

    # Apply Otsu's threshold to create a binary mask
    binary_mask = np.zeros(ndwi_data.shape, dtype=np.uint8)
    binary_mask[(ndwi_data > otsu_threshold) & valid_mask] = 1  # Apply threshold

    # Set NoData values in binary mask
    binary_mask[~valid_mask] = 255

    # Optionally save the binary mask as a GeoTIFF
    if save:
        driver = gdal.GetDriverByName("GTiff");
        options = ["COMPRESS=LZW"];
        geotransform = dataset.GetGeoTransform();
        projection = dataset.GetProjection();
        dset = driver.Create(output_mask_path, binary_mask.shape[1], binary_mask.shape[0], 1, gdal.GDT_Byte, options);
        dset.SetGeoTransform(geotransform);
        dset.SetProjection(projection);
        band = dset.GetRasterBand(1)
        band.WriteArray(binary_mask)
        band.SetNoDataValue(255)  # Set no data value as -9999
        dset.FlushCache()  # Ensure data is written to disk
        dset = None

    dataset = None  # Close the original dataset

    return binary_mask, otsu_threshold, ndwi_values
#land_water_mask, otsu_threshold, ndwi_values = get_land_water_mask_otsu(sample_ndwi_file,waterFreq,90,1,save=False,output_mask_path=None);
#years = ['2021','2022','2023'];
years = ['2016','2017','2018','2019','2020','2021','2022','2023'];
for i in range(len(years)):
    output_path = os.path.join(sentinel_ndwi_dir,'NDWI_land_water_mask_'+years[i]+'.tif');
    ndwi_image_path = glob.glob(os.path.join(sentinel_ndwi_dir,'NDWI*_'+years[i]+'_UTM.tif'))[0];#print(ndwi_image_path)
    land_water_mask, otsu_threshold, ndwi_values = get_land_water_mask_otsu(ndwi_image_path,waterFreq,90,1,save=True,output_mask_path=output_path);
    print(years[i],' otsu_threshold:',otsu_threshold)

    

   

## Compute difference between SAR and optical masks

In [None]:
from osgeo import gdal, osr
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

def compute_difference_map(mask1_path, mask2_path, output_path=None):
    """
    Compute the difference map between two binary masks.
    
    Parameters:
    - mask1_path (str): Path to the first binary mask.
    - mask2_path (str): Path to the second binary mask.
    - output_path (str, optional): Path to save the difference map.
    
    Returns:
    - difference_map (numpy.ndarray): The computed difference map.
    """
    with rasterio.open(mask1_path) as mask1_ds:
        mask1_array = mask1_ds.read(1).astype(np.float64)
        mask1_transform = mask1_ds.transform
        mask1_crs = mask1_ds.crs
        mask1_meta = mask1_ds.meta.copy()
    
    with rasterio.open(mask2_path) as mask2_ds:
        mask2_array = mask2_ds.read(1).astype(np.float64)
        mask2_transform = mask2_ds.transform
        mask2_crs = mask2_ds.crs
    
    # Ensure both masks have the same CRS
    if mask1_crs != mask2_crs:
        raise ValueError("Masks have different CRS.")
    
    # Reproject mask2 to match mask1
    dst_transform, width, height = calculate_default_transform(
        mask2_crs, mask1_crs, mask2_ds.width, mask2_ds.height, *mask2_ds.bounds)
    
    mask2_reprojected = np.empty((height, width), dtype=np.float64)
    
    reproject(
        source=mask2_array,
        destination=mask2_reprojected,
        src_transform=mask2_transform,
        dst_transform=dst_transform,
        src_crs=mask2_crs,
        dst_crs=mask1_crs,
        resampling=Resampling.nearest)
    
    # Compute the difference map
    difference_map = mask1_array - mask2_reprojected
    difference_map[(mask1_array == 255) | (mask2_reprojected == 255)] = 255
    
    # Save the difference map if an output path is provided
    if output_path:
        mask1_meta.update(dtype=rasterio.float32, count=1)
        with rasterio.open(output_path, 'w', **mask1_meta) as dst:
            dst.write(difference_map.astype(rasterio.float32), 1)
        
    return mask1_array,mask2_array,difference_map



## Compute confusion matrix

In [None]:
fig, axes = plt.subplots(1, len(years), figsize=(10, 5),sharex=True,sharey=True);
year = '2020'
NDWI_Mask_raster_file = os.path.join(sentinel_ndwi_dir,'NDWI_land_water_mask_'+year+'.tif')
SAR_mask_file = '/Users/bvarugu/Documents/Belomonte/Xingu_68_604_599_merged/VV_tifs/subset_tifs/LW_mask__'+year+'10_VV.tif';
LW_SAR_mask,LW_optical_mask,difference_map = compute_difference_map(SAR_mask_file, NDWI_Mask_raster_file);
axes[0].imshow(np.ma.masked_where(LW_SAR_mask==255,LW_SAR_mask), cmap='Blues',vmin=0,vmax=2);
axes[1].imshow(np.ma.masked_where(LW_optical_mask==255,LW_optical_mask), cmap='Blues',vmin=0,vmax=2);
axes[2].imshow(np.ma.masked_where(difference_map==255,difference_map), cmap='RdBu',vmin=-1,vmax=1);
axes[0].set_title(str(year));axes[1].set_title(str(year));axes[2].set_title(str(year));
axes[0].axis('off');axes[1].axis('off');axes[2].axis('off');

from sklearn.metrics import confusion_matrix
def compute_confusion_matrix(LW_SAR_mask, LW_optical_mask, labels, nodata_value=255):
    # Flatten the masks
    LW_SAR_mask_flat = LW_SAR_mask.flatten()
    LW_optical_mask_flat = LW_optical_mask.flatten()
    
    # Create a mask for valid data (non-NoData)
    valid_mask = (LW_SAR_mask_flat != nodata_value) & (LW_optical_mask_flat != nodata_value)
    
    # Apply the mask to the flattened arrays
    LW_SAR_mask_valid = LW_SAR_mask_flat[valid_mask]
    LW_optical_mask_valid = LW_optical_mask_flat[valid_mask];
    #use Bhuvan's method
    matrix = confidence_vals(LW_optical_mask_valid,LW_SAR_mask_valid,[0,1],indices);
    # Compute the confusion matrix
    #matrix = confusion_matrix(LW_SAR_mask_valid, LW_optical_mask_valid, labels=labels)
    
    return matrix
matrix = compute_confusion_matrix(LW_SAR_mask, LW_optical_mask, [1,0],nodata_value=255)
df = pd.DataFrame(matrix, columns=indices, index= indices)#
df = df.round(2);

# #pd.set_option('display.precision', 2)
df.style.set_properties(**{'font-size':'20pt'});

display(HTML(df.to_html())) 