In [1]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np
import matplotlib.pyplot as plt
import os
import re
from sklearn.cluster import KMeans
from scipy.ndimage import binary_dilation
from matplotlib.colors import ListedColormap
import matplotlib.colors as mcolors
from rasterio.errors import RasterioIOError
import csv
from skimage import io
import requests
import cupy
from PIL import Image
from io import BytesIO

In [None]:
tile = '11SKU'
location = 'Isla_Vista_Kelp'
cloud_cover_threshold = .5
save_mask = True
save_classification = False
remask = False
path = os.path.join(r'/mnt/c/Users/attic/HLS_Kelp/imagery',location,tile)
save_to =r'/mnt/c/Users/attic/HLS_Kelp/imagery/rf_prepped_v3'
num_iterations = 20

In [None]:
granules = os.listdir(path)
for item in granules:
    if os.path.isdir(os.path.join(path,item)):
        img_path = os.path.join(path,item)
        granule = item
        break
    else:
        continue
files = os.listdir(img_path)
file_data = granule.split('.')
sensor = file_data[1]
if(sensor == 'L30'):
    sensor_bands = ['B02','B03','B04','B05']#,'B06','B07'] #2,3,4,5,6,7]
else:
    sensor_bands = ['B02','B03','B04','B08A']#,'B11','B12']
    
pattern = re.compile(r'\.(' + '|'.join(sensor_bands) + r')\.tif$')
img_files = [f for f in files if re.search(pattern, f)]

geotiff_path = os.path.join(img_path, img_files[0])

geotiff_path = os.path.join(img_path, img_files[0])

with rasterio.open(geotiff_path) as dst:   
    hls = dst.read()
    dem_path = r'/mnt/c/Users/attic/HLS_Kelp/imagery/Socal_DEM.tiff'
    dem = rasterio.open(dem_path)
    if (dem.crs != dst.crs):
        reprojected_dem = np.zeros((hls.shape[1], hls.shape[2]), dtype=hls.dtype)
        reproject(
            source=dem.read(),
            destination=reprojected_dem,
            src_transform=dem.transform,
            src_crs=dem.crs,
            dst_transform=dst.transform,
            dst_crs=dst.crs,
            resampling=Resampling.bilinear)
    hls_flat = np.squeeze(hls, axis=0)   

if reprojected_dem.any():
    struct = np.ones((5,5))
    land_mask = binary_dilation(reprojected_dem > 0, structure = struct)
    ocean_mask = binary_dilation(reprojected_dem < -60 , structure = struct)
    full_mask = land_mask + ocean_mask
    plt.figure(figsize=(6, 6))
    plt.imshow(land_mask, cmap='gray')
    plt.show()    
    # plt.figure(figsize=(6, 6))
    # plt.imshow(full_mask, cmap='gray')
    # plt.show()
    if save_mask:
        mask_path = os.path.join(path,f'{tile}_fullmask.tif')
        transform = dst.transform  
        height, width = full_mask.shape
        profile = {
            'driver': 'GTiff',
            'width': width,
            'height': height,
            'count': 1,  # one band
            'dtype': rasterio.uint8,  # assuming binary mask, adjust dtype if needed
            'crs': dst.crs,
            'transform': transform,
            'nodata': 0  # assuming no data is 0
        }

        # Write the land mask array to GeoTIFF
        with rasterio.open(mask_path, 'w', **profile) as dst:
            dst.write(full_mask.astype(rasterio.uint8), 1)
else:
    print("Something failed, you better go check...")

In [None]:
iterations = 0
print(len(granules))
for item in granules: 
    if iterations >= num_iterations:
        break
 ##==========Select Granule and Get File Names==========##
    if os.path.isdir(os.path.join(path,item)):
        img_path = os.path.join(path,item)
    else:
        continue
    if not remask and os.path.isfile(os.path.join(save_to,f'{item}_rf_ready.tif')):
        continue

    img_path = os.path.join(path,item)
    files = os.listdir(img_path)
    file_data = item.split('.')
    sensor = file_data[1]
    if(sensor == 'L30'):
        sensor_bands = ['B02','B03','B04','B05']#,'B06','B07'] #2,3,4,5,6,7]
    else:
        sensor_bands = ['B02','B03','B04','B8A']#,'B11','B12']
    img_files = [f for f in files if re.search(pattern, f)]
    def get_band_index(filename):
        match = re.search(r'\.(B\d{2}|B8A)\.tif$', filename)
        if match:
            return sensor_bands.index(match.group(1))
        return -1
    pattern = re.compile(r'\.(' + '|'.join(sensor_bands) + r')\.tif$')
    try:
        img_files = sorted(img_files, key=get_band_index)
    except:
        print("invalid granule files")
        continue
    print(img_files)

    f_mask = [f for f in files if re.search(r'Fmask\.tif$', f)]
    if not f_mask:
        print(f"Invalid granule: {item}")
        continue
    if not len(img_files)  == 6:
        print(f"incomplete file download: {item}")
        continue
    img_bands = []
    metadata_file = [f for f in files if re.search(r'metadata\.csv$', f)]
    if metadata_file :
        with open(os.path.join(path,item, metadata_file[0]), mode='r') as file:
            csv_reader = csv.reader(file)
            keys = next(csv_reader)  
            values = next(csv_reader) 
        metadata = dict(zip(keys, values))

    ##==========Fmask Cloud mask==========##
    #bitwise operations are weird. Far outside my comfort zone. Need to take CS33 first.........
    try:
        with rasterio.open(os.path.join(img_path,f_mask[0])) as fmask:
            qa_band = fmask.read(1)
        qa_bit = (1 << 1) - 1
        qa_cloud_mask = ((qa_band >> 1) & qa_bit) == 1  # Bit 1 for cloud
        qa_adjacent_to_cloud_mask = ((qa_band >> 2) & qa_bit) == 1  # Bit 2 for cloud adjacent
        qa_cloud_shadow = ((qa_band >> 3) & qa_bit) == 1 
        qa_ice = ((qa_band >> 4) & qa_bit) == 1 
        #qa_water = ((qa_band >> 5) & qa_bit) == 1
        qa_aerosol = (((qa_band >> 6) & 1) == 1) & (((qa_band >> 7) & 1) == 1)
        cloud_mask = qa_cloud_mask | qa_adjacent_to_cloud_mask | qa_cloud_shadow | qa_ice  | qa_aerosol #Mask out Clouds and cloud-adjacent pixels 
        cloud_mask_2D = cloud_mask.reshape(-1).T
    except RasterioIOError as e:
        print(f"Error reading file {file} in granule {item}: {e}")
        continue  # Skip to the next granule if a file cannot be read
    #may not be necessary to mask out the cloud-adjacent pixels 
    #print(img_2D_normalized.shape)
    #print(cloud_mask_2D.shape)

    ##========== Determine percentage of ocean covered by clouds ==========##
    cloud_land_mask = cloud_mask | land_mask
    cloud_but_not_land_mask = cloud_mask & ~land_mask
    num_pixels_cloud_not_land = np.count_nonzero(cloud_but_not_land_mask)
    num_pixels_not_land = np.count_nonzero(~land_mask)
    percent_cloud_covered = num_pixels_cloud_not_land/num_pixels_not_land
    if(percent_cloud_covered > cloud_cover_threshold):
        continue
    print(f'Percent cloud covered: {percent_cloud_covered}')
    
 ##==========Create stacked np array, Apply landmask==========##
    try:
        for file in img_files:
            with rasterio.open(os.path.join(img_path, file)) as src:
                img_bands.append(np.where(cloud_land_mask, 0, src.read(1)))  # Create image with the various bands
    except RasterioIOError as e:
        print(f"Error reading file {file} in granule {item}: {e}")
        continue  # Skip to the next granule if a file cannot be read
    img = np.stack(img_bands, axis=0)
    n_bands, height, width = img.shape
    #img_2D = img.reshape(img.shape[0], -1).T #classifier takes 2D array of band values for each pixel 
    #normalized_img_bands = np.column_stack((img_2D, cloud_mask_2D))
 ##========== Normalize multi-spectral data ==========##

    #normalized_img_bands = np.column_stack((img_2D, cloud_mask_2D))#.astype(np.float64)
    #print(type(normalized_img_bands[0,0]))
    #print(normalized_img_bands.shape)
 ##========== Add masked file-folder to directory, if needed ==========##
    #if not os.path.isdir (save_to):
    #    os.mkdir(save_to)
    #classification_path = os.path.join(save_to,f'{item}_rf_ready.tif')
    #transform = dst.transform  
    #normalized_img_bands = img_2D.reshape(height, width, 4)
    



    

In [None]:
def calculate_local_variance(image_gpu, window_size):

    mean_kernel = cp.ones((window_size, window_size), dtype=cp.float32) / (window_size * window_size)
    local_mean_gpu = convolve(image_gpu.astype(cp.float32), mean_kernel, mode='constant', cval=0.0)

    squared_image_gpu = cp.square(image_gpu.astype(cp.float32))
    mean_squared_gpu = convolve(squared_image_gpu, mean_kernel, mode='constant', cval=0.0)
    local_variance_gpu = mean_squared_gpu - cp.square(local_mean_gpu)
    
    return local_variance_gpu

In [None]:

folder_path = '/mnt/c/Users/attic/HLS_Kelp/imagery/rf_classified_S30'
path = r'/mnt/c/Users/attic/HLS_Kelp/imagery/rf_classified_S30/HLS.S30.T11SKU.2021286T184331.v2.0_kelp_classified.tif'
ocean_dilation = cp.ones((100, 100))  # Struct for dilation (increase to enlarge non-ocean mask) larger --> takes longer

kelp_neighborhood = 5
min_kelp_count = 4
kelp_dilation_size = 25
num_EM = 30

variance_window_size = 15
variance_threshold = 1

structuring_element = cp.ones((kelp_dilation_size, kelp_dilation_size))
files = os.listdir(folder_path)

print(f"starting {item}")
path = os.path.join(folder_path,item)
time_st = time.time()
with rasterio.open(path) as imagery:
    classified_img = imagery.read(5)
    
    # Transfer data to GPU
    classified_img_gpu = cp.array(classified_img)
    kelp_mask = []
    ocean_mask = []

    time_val = time.time()
    land_dilated_gpu = cp.where(classified_img_gpu == 3, True, False)
    clouds_dilated_gpu = cp.where(classified_img_gpu == 2, True, False)
    land_dilated_gpu = binary_dilation(land_dilated_gpu, structure=ocean_dilation)
    print(f'land finished: {time.time()-time_val}')

    ocean_dilated_gpu = land_dilated_gpu | clouds_dilated_gpu 

    def count_true(window):
        return cp.sum(window)
    
    kelp_dilated_gpu = cp.where(classified_img_gpu == 0, True, False)  # This is expanding the kelp_mask so the TF is reversed
    kernel = cp.ones((kelp_neighborhood, kelp_neighborhood), dtype=cp.int32)
    
    time_val = time.time()
    kelp_count_gpu = convolve(kelp_dilated_gpu.astype(cp.int32), kernel, mode='constant', cval=0.0)
    print(f'kelp moving average finished: {time.time()-time_val}')

    kelp_dilated_gpu = cp.where(((~kelp_dilated_gpu) | (kelp_count_gpu <= min_kelp_count)), 0, 1)  # If there's no kelp, or the kelp count is <=4, set pixel == false
    time_val = time.time()
    kelp_dilated_gpu = binary_dilation(kelp_dilated_gpu, structure=structuring_element)  # I may not want to do this. we'll see
    print(f'kelp dilation finished: {time.time()-time_val}')
    time_val = time.time()
    for i in range(4):
        band_data = imagery.read(i + 1)
        band_data_gpu = cp.array(band_data)
        
        kmask_gpu = cp.where(kelp_dilated_gpu == 1, band_data_gpu, cp.nan)
        omask_gpu = cp.where((ocean_dilated_gpu == False), band_data_gpu, cp.nan)

        local_variance_gpu = calculate_local_variance(band_data_gpu, variance_window_size)
        max_local_variance = cp.percentile(local_variance_gpu, 100 * variance_threshold)  # threshold variance
        
        # Mask pixels with high variance
        variance_mask_gpu = cp.where(local_variance_gpu > max_local_variance, cp.nan, band_data_gpu)
        
        # Apply the variance mask to the ocean mask
        final_omask_gpu = cp.where((ocean_dilated_gpu == True) | cp.isnan(variance_mask_gpu) , cp.nan, band_data_gpu)
        
        kmask = cp.asnumpy(kmask_gpu)
        omask = cp.asnumpy(final_omask_gpu)
        
        kelp_mask.append(kmask)
        ocean_mask.append(omask)
    print(f'kBand masking and variance masking complete: {time.time()-time_val}')
    kelp_mask = np.array(kelp_mask)
    ocean_mask = np.array(ocean_mask)

    rgb_nor = np.stack([ocean_mask[2] / 600, ocean_mask[0] / 600, ocean_mask[1] / 600], axis=-1)
    rgb_nor_cropped = rgb_nor
    rgb_nor_cropped = np.ma.masked_where(np.isnan(rgb_nor_cropped), rgb_nor_cropped)
    
    image = kelp_mask[1]
    plt.figure(figsize=(30, 30), dpi=200)
    plt.imshow(image, alpha=1)
    plt.imshow(rgb_nor_cropped, alpha=1)
    plt.colorbar()
    plt.show()

ocean_EM_stack = []
kelp_EM = [459, 556, 437, 1227]
n_bands, height, width = kelp_mask.shape
ocean_EM_n = 0
ocean_data = ocean_mask.reshape(ocean_mask.shape[0], -1)
kelp_data = kelp_mask.reshape(kelp_mask.shape[0],-1)

nan_columns = np.isnan(ocean_data).any(axis=0)
# Remove columns with NaN values
filtered_ocean = ocean_data[:, ~nan_columns]
print(filtered_ocean.shape)
if(len(filtered_ocean[0,:]) < 1000):
    print("Insufficient number of ocean pixels")
    sys.exit()
for i in range(num_EM):
    index = random.randint(0,len(filtered_ocean[0])-1)
    ocean_EM_stack.append(filtered_ocean[:,index])
ocean_EM = np.stack(ocean_EM_stack, axis=1)
#print(ocean_EM_array)


average_val = np.nanmean(filtered_ocean, axis=1)
average_endmember = np.nanmean(ocean_EM, axis=1)
print(f"average EM Val: {average_endmember}")
print(f"average    Val: {average_val}")

kelp_mask = cp.asarray(kelp_mask)
ocean_EM = cp.asarray(ocean_EM)
kelp_EM = cp.asarray(kelp_EM)
kelp_data = cp.asarray(kelp_data)

frac1 = cp.full((kelp_mask.shape[1], kelp_mask.shape[2], num_EM), cp.nan)
frac2 = cp.full((kelp_mask.shape[1], kelp_mask.shape[2], num_EM), cp.nan)
rmse = cp.full((kelp_mask.shape[1], kelp_mask.shape[2], num_EM), cp.nan)
print(rmse.shape)

#print("Running MESMA")
for k in range(num_EM):
    B = cp.column_stack((ocean_EM[:, k], kelp_EM))
    U, S, Vt = cp.linalg.svd(B, full_matrices=False)
    IS = Vt.T / S
    em_inv = IS @ U.T
    F = em_inv @ kelp_data
    model = (F.T @ B.T).T
    resids = (kelp_data - model) / 10000
    rmse[:, :, k] = cp.sqrt(cp.mean(resids**2, axis=0)).reshape(height, width)
    frac1[:, :, k] = F[0, :].reshape(height, width)
    frac2[:, :, k] = F[1, :].reshape(height, width)
    #print(f'Percent MESMA {round(100/30 * (k+1))}%')

minVals = cp.nanmin(rmse, axis=2)
PageIdx = cp.nanargmin(rmse, axis=2)

rows, cols = cp.meshgrid(cp.arange(rmse.shape[0]), cp.arange(rmse.shape[1]), indexing='ij')

Zindex = cp.ravel_multi_index((rows, cols, PageIdx), dims=rmse.shape)

Mes2 = frac2.ravel()[Zindex]
Mes2 = Mes2.T
Mes2 = -0.229 * Mes2**2 + 1.449 * Mes2 - 0.018 #Landsat mesma corrections 
Mes2 = cp.clip(Mes2, 0, None)  # Ensure no negative values
Mes2 = cp.round(Mes2 * 100).astype(cp.int16)
#Mes2 = Mes2.astype(cp.float32)
#Mes2 = Mes2.where(Mes2 == 0, cp.nan)
kelp_img = cp.asnumpy(kelp_mask)
Mes_array = cp.asnumpy(Mes2).T
Mes_vis = np.where(Mes_array ==0, np.nan, Mes_array)
kelp_vis = np.where(kelp_img ==0 ,np.nan, kelp_img)
print(item)
plt.figure(figsize=(20, 20), dpi=200)
#plt.imshow(band_data[2800:3100,800:1400], alpha=.5)
plt.imshow(kelp_vis[1,2800:3100,800:1400] , cmap='Greys')
plt.imshow(Mes_vis[2800:3100,800:1400], alpha=1)
plt.colorbar()
plt.show()
