In [None]:
import rasterio
import os
#import kelp_tools as kt
import pickle
import numpy as np
from rasterio.errors import RasterioIOError
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta
from IPython.display import clear_output

In [None]:
def load_processed_img(path, file, cp=False, bands=None, just_data=False, geo_info=False, cloud_coverage=True, crop=False):
    try:
        if bands==None:
            with rasterio.open(os.path.join(path,file), 'r') as src:
                data = src.read()
                metadata = src.tags()
                transform = src.transform
                crs = src.crs
        else:
            with rasterio.open(os.path.join(path,file), 'r') as src:
                data = src.read(bands)
                metadata = src.tags()
                transform = src.transform
                crs = src.crs
        if crop:
            data = data[:,2800:3050,875:1300]
    except RasterioIOError as e:
        print(f"Error reading file {file}: {e}")
        return None
    try:
        tide = float(metadata['TIDE'])
        current =float(metadata['CURRENT'])
        clouds =float(metadata['CLOUD_COVERAGE'])
        #print(cloud_coverage)
    except:
        print(f'{file} has no TIDE or CURRENT metadata')
        return None
    if just_data:
        return data
    if geo_info:
        return data, tide, current, clouds, transform, crs
    elif cloud_coverage:
        return data, tide, current, clouds
    else:
        return data, tide, current
    
def get_sensor(granule):
    file_data = granule.split('.')
    return file_data[1]

def extract_date(filename):
    parts = filename.split('.')
    date_str = parts[3]
    date = datetime.strptime(date_str, '%Y%jT%H%M%S')
    return date

def group_by_date(filenames, max_days=4, max_pair_size = 100):
    dates_and_files = [(extract_date(filename), filename) for filename in filenames]
    dates_and_files.sort()  # Sort by date

    neighborhood = []
    max_pair_size = 2
    neighbors = []
    last_date = None

    for date, filename in dates_and_files:
        if last_date is None:
            last_date = date
        if last_date is None or (date - last_date).days <= max_days:
            neighbors.append(filename)
            if(len(neighbors) >=max_pair_size):
                neighborhood.append((len(neighbors), neighbors))
                neighbors = [filename]
                last_date = date
        else:
            if (len(neighbors) > 1):
                neighborhood.append((len(neighbors), neighbors))
            neighbors = [filename]
            last_date = date

    if neighbors and len(neighbors) > 1:
         neighborhood.append((len(neighbors), neighbors))
    neighborhood.sort(key=lambda x: x[0], reverse=True)
    return neighborhood

def get_mesma_pixel_sums(path, file1, file2, mesma_residuals=False, crop=False, bands=[5,6], only_overlap=False):
    f_data = load_processed_img(path,file1, bands=bands, geo_info=False,cloud_coverage=True, crop=crop)
    if f_data is None:
        return None
    #print(f_data)
    f_img, f_tide, f_current, f_clouds = f_data
    s_data = load_processed_img(path,file2, bands=bands, geo_info=False,cloud_coverage=True, crop=crop)
    if s_data is None:
        return None
    s_img, s_tide, s_current, s_clouds = s_data
    # if crop:
    #     s_img = s_img[:,2800:3050,850:1600]
    #     f_img = f_img[:,2800:3050,850:1600]
    
    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 10, 0 , f_mesma)
    f_mesma = np.where(f_mesma > 150, 0, f_mesma)
    if only_overlap:
        f_mesma = np.where(s_img[0] == 0, f_mesma, 0)
    else:
        f_mesma = np.where(s_img[0] == 2, 0, f_mesma)
    
    f_kelp = np.where(f_img[0] == 0, 1, 0)
    f_kelp = np.where(s_img[0] == 2, 0, f_kelp)
    f_kelp_pixels = np.sum(f_kelp)
    f_sum = np.sum(f_mesma)

    s_mesma = np.array(s_img[1])
    s_mesma = np.where(s_mesma < 10, 0 , s_mesma)
    s_mesma = np.where(s_mesma > 150, 0, s_mesma)
    f_clouds = np.sum(np.where(f_img[0] ==2, 1, 0))
    s_clouds + np.sum(np.where(s_img[0] ==2,1,0))
    if only_overlap:
        s_mesma = np.where(f_img[0] == 0, s_mesma, 0)
    else:
        s_mesma = np.where(f_img[0] == 2, 0, s_mesma)
    s_kelp = np.where(s_img[0] == 0, 1, 0)
    s_kelp = np.where(f_img[0] == 2, 0, s_kelp)
    s_kelp_pixels = np.sum(s_kelp)
    s_sum = np.sum(s_mesma)
    #print(s_sum)
    data = [file1, f_sum, f_kelp_pixels,f_current,f_tide,f_clouds, file2,s_sum,s_kelp_pixels,s_current,s_tide, s_clouds]
    if mesma_residuals:
        mesma_res = f_mesma - s_mesma
        return data , mesma_res, f_mesma, s_mesma
    return data

def get_mesma_residuals(path, file1, file2, crop=False, only_overlap=False):
    bands=[5,6]
    f_data = load_processed_img(path,file1, bands=bands, just_data=True)
    if f_data is None:
        return None
    f_img, f_tide, f_current = f_data
    s_data = load_processed_img(path,file2, bands=bands, just_data=True)
    if s_data is None:
        return None
    s_img, s_tide, s_current = s_data
    if crop:
        s_img = s_img[:,2800:3050,850:1600]
        f_img = f_img[:,2800:3050,850:1600]

    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 10, 0 , f_mesma)
    f_mesma = np.where(f_mesma > 150, 0, f_mesma)
    if only_overlap:
        f_mesma = np.where(s_img[0] == 0, f_mesma, 0)
    else:
        f_mesma = np.where(s_img[0] == 2, 0, f_mesma)


    s_mesma = np.array(s_img[1])
    s_mesma = np.where(s_mesma < 10, 0 , s_mesma)
    s_mesma = np.where(s_mesma > 150, 0, s_mesma)
    #
    if only_overlap:
        s_mesma = np.where(f_img[0] == 0, s_mesma, 0)
    else:
        s_mesma = np.where(f_img[0] == 2, 0, s_mesma)


    mesma_res = f_mesma - s_mesma
    return mesma_res, f_mesma, s_mesma

def get_col_keys():

    return  ['img1','f_mesma', 'f_kelp_pixels', 'f_current', 'f_tide','f_clouds', 'img2', 's_mesma', 's_kelp_pixels', 's_current','s_tide','s_clouds']

def plot_pair_values(df):
    f_mesma = df['f_mesma'].astype(int)
    s_mesma= df['s_mesma'].astype(int)
    f_kelp = df['f_kelp_pixels'].astype(int)
    s_kelp= df['s_kelp_pixels'].astype(int)
    clouds= df['s_clouds'].astype(float) + df['f_clouds'].astype(float)

    min_val = min(f_mesma.min(), s_mesma.min())
    max_val = max(f_mesma.max(), s_mesma.max())
    x = np.linspace(min_val, max_val, 100)
    y = x
    slope, intercept = np.polyfit(f_mesma, s_mesma, 1)
    print(slope, intercept)
    y_fit = slope * x + intercept
    plt.figure(figsize=(18,6))
    plt.subplot(1, 2, 1) 
    scatter_1 = plt.scatter(f_mesma, s_mesma, c=clouds, alpha=0.7)
    plt.plot(x, y, color='red', label='y = x')
    plt.colorbar(scatter_1, label='Clouds')
    plt.legend()
    plt.xlabel('Image 1')
    plt.ylabel('Image 2')
    plt.title('Mesma Pixel Summation Comparison')

    plt.subplot(1,2,2)
    scatter_2 = plt.scatter(f_kelp, s_kelp, c=clouds, alpha=0.7)
    plt.colorbar(scatter_2, label='Fractional Cloud Coverage')
    plt.legend()
    plt.xlabel('Image 1')
    plt.ylabel('Image 2')
    plt.title('Classified Pixel Count Comparison')
    plt.show()

def view_rgb(path, file1, file2, crop=False,  title_1='rgb1', title_2='rgb2'):
    img_1 = load_processed_img(path,file1, bands=[1,2,3,5,6], just_data=True)
    img_2 = load_processed_img(path,file2, bands=[1,2,3,5,6], just_data=True)
    rgb_1 = np.stack([img_1[2], img_1[1], img_1[0]], axis=-1)
    rgb_2 = np.stack([img_2[2], img_2[1], img_2[0]], axis=-1)
    mesma1 = img_1[3]
    mesma2 = img_2[3]
    kelp1 = img_1[4]
    kelp2 = img_2[4]
    if crop:
        rgb_1 = rgb_1[2800:3200,800:1600]
        rgb_2 = rgb_2[2800:3200,800:1600]
        kelp1 = kelp1[2800:3200,800:1600]
        kelp2 = kelp2[2800:3200,800:1600]
        mesma1 = mesma1[2800:3200,800:1600]
        mesma2 = mesma2[2800:3200,800:1600]
    plt.figure(figsize=(15, 15)) 
    plt.subplot(3, 2, 1) 
    plt.imshow(rgb_1)
    plt.title(title_1)
    plt.subplot(3, 2, 2) 
    plt.imshow(rgb_2)
    plt.title(title_2)
    plt.subplot(3, 2, 3) 
    plt.imshow(kelp1)
    plt.title(title_1)
    plt.subplot(3, 2, 4) 
    plt.imshow(kelp2)
    plt.title(title_2)
    plt.subplot(3, 2, 5) 
    plt.imshow(mesma1)
    plt.title(title_1)
    plt.subplot(3, 2, 6) 
    plt.imshow(mesma2)
    plt.title(title_2)
    plt.show()

def plot_four(plot1, plot2, plot3, plot4=None, title1='plot 1', title2 = 'plot 2', title3='plot 3', title4='plot4'):
    plt.figure(figsize=(20,10))
    plt.subplot(2, 2, 1)
    plt.imshow(plot1)
    plt.colorbar()
    plt.title(title1)
    plt.subplot(2,2,2)
    plt.imshow(plot2)
    plt.colorbar()
    plt.title(title2)
    plt.subplot(2, 2, 3)
    plt.imshow(plot3)
    plt.colorbar()
    plt.title(title3)
    if(plot4 is not None):
        plt.subplot(2, 2, 4)
        plt.imshow(plot4)
        plt.colorbar()
        plt.title(title4)
    plt.show()
def plot_tide(df):
    tide_diff = df['f_tide'] - df['s_tide']

    mesma_ht = np.where(tide_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lt = np.where(tide_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(tide_diff)
    mesma_diff = (mesma_lt - mesma_ht) / mesma_ht

    plt.figure()
    plt.scatter(tide_diff, mesma_diff)
    plt.title("Water Height difference vs Kelp Detection")
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Water Height")
    #plt.ylim(0, 5)
    plt.show()
def plot_current(df):
    current_diff = df['f_current'] - df['s_current']

    mesma_hc = np.where(current_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lc = np.where(current_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(current_diff)
    mesma_diff = (mesma_lc - mesma_hc) / mesma_ht

    plt.figure()
    plt.scatter(current_diff, mesma_diff)
    plt.title("Water Height difference vs Kelp Detection")
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Current Magnitude")
    #plt.ylim(0, 5)
    plt.show()

def plot_tide_current(df):
    current_diff = df['f_current'] - df['s_current']
    mesma_ht = np.where(current_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lt = np.where(current_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(current_diff)
    mesma_diff = (mesma_lt - mesma_ht) / mesma_ht
    tide_diff = df['f_tide'] - df['s_tide']
    mesma_ht = np.where(tide_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lt = np.where(tide_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(tide_diff)
    mesma_diff = (mesma_lt - mesma_ht) / mesma_ht

    plt.figure(figsize=(15,6))
    plt.subplot(1, 2, 1) 
    plt.title("Water Height difference vs Kelp Detection")
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Water Height")
    plt.scatter(tide_diff, mesma_diff*100)
    plt.subplot(1,2,2)
    plt.scatter(current_diff, mesma_diff*100)
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Current Magnitude")
    plt.show()

def analyze_mesma_pixel(path, file1, file2, bands=[5,6], crop=False, residuals=False):
    f_data = load_processed_img(path,file1, bands=bands, just_data=True, crop=crop)
    if f_data is None:
        return None
    #print(f_data)
    f_img = f_data
    s_data = load_processed_img(path,file2, bands=bands, just_data=True, crop=crop)
    if s_data is None:
        return None
    s_img= s_data
    
    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 10, 0 , f_mesma) #remove values < 10
    f_mesma = np.where(f_mesma > 120, 0 , f_mesma)
    f_mesma = np.where(s_img[0] == 2, 0, f_mesma) #Remove values that are clouds in other image

    f_sum = np.sum(f_mesma)

    s_mesma = np.array(s_img[1])
    s_mesma = np.where(s_mesma < 10, 0 , s_mesma) #Remove values <10
    s_mesma = np.where(s_mesma > 120, 0 , s_mesma) 
    s_mesma = np.where(f_img[0] == 2, 0, s_mesma) #remove pixels clouded in other image

    s_sum = np.sum(s_mesma)

    s_mesma_binary = np.where(s_mesma >0 , 1, 0)
    f_mesma_binary = np.where(f_mesma > 0,1 ,0)

    sf_mesma_binary = np.where(s_mesma_binary, f_mesma_binary, 0)

    s_count = np.sum(s_mesma_binary)
    f_count = np.sum(f_mesma_binary)
    sf_count = np.sum(sf_mesma_binary)
    if residuals:
        resids = f_mesma - s_mesma
        abs_resids = abs(resids)
        resids_sum = np.sum(abs_resids)
        return sf_count, s_sum, f_sum, s_count, f_count, s_mesma, f_mesma, resids, resids_sum
    else:
        return sf_count, s_sum, f_sum, s_count, f_count, s_mesma, f_mesma




In [None]:
path = r'H:\HLS_data\imagery\Isla_vista_kelp\processed_v2\11SKU'
filenames = os.listdir(path)
pairs = group_by_date(filenames, max_days=3, max_pair_size=2)
print(f'Pairs found: {len(pairs)}')

In [None]:
data_array = []
for pair in pairs:
    files = pair[1]
    data = get_mesma_pixel_sums(path, files[0], files[1], crop=True)
    if data is not None:
        data_array.append(data)
        #print(data)
    
data_array =np.stack(data_array)
df = pd.DataFrame(data_array, columns=get_col_keys())


In [None]:
df['f_kelp_pixels'] = df['f_kelp_pixels'].astype(int)
df['s_kelp_pixels'] = df['s_kelp_pixels'].astype(int)
df['f_mesma'] = df['f_mesma'].astype(int)
df['s_mesma'] = df['s_mesma'].astype(int)
df['f_tide'] = df['f_tide'].astype(float)
df['s_tide'] = df['s_tide'].astype(float)
df['f_current'] = df['f_current'].astype(float)
df['s_current'] = df['s_current'].astype(float)
df['percent_change'] = 2 *(abs(df['f_mesma'] - df['s_mesma'])) / (df['s_mesma'] + df['f_mesma'])

filtered_df = df[(df['f_mesma'] <= 50000) & (df['s_mesma'] <= 50000)]
filtered_df = filtered_df[(filtered_df['f_kelp_pixels'] <= 6000) & (filtered_df['s_kelp_pixels'] <= 6000)]

In [None]:
outlier_df = filtered_df[(filtered_df['percent_change'] > .5)]
uniform_df = filtered_df[(filtered_df['percent_change'] <= .5)]

#print(outlier_df['percent_change'])
plot_pair_values(outlier_df)
plot_pair_values(uniform_df)



In [None]:
for i, pair in uniform_df.iterrows():
    print(pair['img1'], pair['img2'])
    packet = analyze_mesma_pixel(path, pair['img1'], pair['img2'], crop=True, residuals=True)
    sf_count, s_sum, f_sum, s_count, f_count, s_mesma, f_mesma, resids, resids_sum = packet
    print(sf_count, f_count, s_count, s_sum, f_sum, resids_sum)
    plot_four(s_mesma, f_mesma, resids)

In [None]:
for i, pair in uniform_df.iterrows():
    print(pair['img1'], pair['img2'])
    view_rgb(path, pair['img1'], pair['img2'], title_1=pair['img1'], title_2=pair['img2'], crop=True)

In [None]:
plot_tide_current(uniform_df)

In [None]:
plot_pair_values(filtered_df)