In [None]:
import rasterio
import os
#import kelp_tools as kt
import pickle
import numpy as np
from rasterio.errors import RasterioIOError
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta
from IPython.display import clear_output
import matplotlib.dates as mdates
import re

In [None]:
def load_processed_img(path, file, bands=None, just_data=False, geo_info=False, cloud_coverage=True, crop=False, date_return=False):
    try:
        if bands==None:
            with rasterio.open(os.path.join(path,file), 'r') as src:
                data = src.read()
                metadata = src.tags()
                transform = src.transform
                crs = src.crs
        else:
            with rasterio.open(os.path.join(path,file), 'r') as src:
                data = src.read(bands)
                metadata = src.tags()
                transform = src.transform
                crs = src.crs
        if crop:
            data = data[:,2800:3050,875:1300]

    except RasterioIOError as e:
        print(f"Error reading file {file}: {e}")
        return None
    if just_data:
        return data
    
    try:
        tide = float(metadata['TIDE'])
        current =float(metadata['CURRENT'])
        clouds =float(metadata['CLOUD_COVERAGE'])
        #print(cloud_coverage)
    except:
        print(f'{file} has no TIDE or CURRENT metadata')
        return None
    return_vals =[data]
    if date_return:
        date = metadata['TIMESTAMP']
        date = date.rstrip('Z')
        # Truncate to match the expected format (up to microseconds)
        date = date[:26]
        # Parse the date string into a datetime object
        date_obj = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")

        day_num = date_obj.timestamp() / 86400 
        return_vals.append(day_num)
        return_vals.append(date)
    return_vals.append(tide)
    return_vals.append(current)
    if geo_info:
        return_vals.append(transform),
        return_vals.append(crs)
    if cloud_coverage:
        return_vals.append(clouds)
    return return_vals
    
def get_sensor(granule):
    file_data = granule.split('.')
    return file_data[1]

def extract_date(filename):
    parts = filename.split('.')
    if len(parts) <4:
        return None
    date_str = parts[3]
    date = datetime.strptime(date_str, '%Y%jT%H%M%S')
    return date

def group_by_date(filenames, max_days=4, max_pair_size = 100):
    dates_and_files = [(extract_date(filename), filename) for filename in filenames]
    dates_and_files.sort()  # Sort by date

    neighborhood = []
    max_pair_size = 2
    neighbors = []
    last_date = None

    for date, filename in dates_and_files:
        if last_date is None:
            last_date = date
        if last_date is None or (date - last_date).days <= max_days:
            neighbors.append(filename)
            if(len(neighbors) >=max_pair_size):
                neighborhood.append((len(neighbors), neighbors))
                neighbors = [filename]
                last_date = date
        else:
            if (len(neighbors) > 1):
                neighborhood.append((len(neighbors), neighbors))
            neighbors = [filename]
            last_date = date

    if neighbors and len(neighbors) > 1:
         neighborhood.append((len(neighbors), neighbors))
    neighborhood.sort(key=lambda x: x[0], reverse=True)
    return neighborhood

def get_mesma_pixel_sums(path, file1, file2, mesma_residuals=False, crop=False, bands=[5,6], only_overlap=False, kelp_map=None):
    f_data = load_processed_img(path,file1, bands=bands, geo_info=False,cloud_coverage=True, crop=crop)
    if f_data is None:
        return None
    #print(f_data)
    f_img, f_tide, f_current, f_clouds = f_data
    s_data = load_processed_img(path,file2, bands=bands, geo_info=False,cloud_coverage=True, crop=crop)
    if s_data is None:
        return None
    s_img, s_tide, s_current, s_clouds = s_data
    # if crop:
    #     s_img = s_img[:,2800:3050,850:1600]
    #     f_img = f_img[:,2800:3050,850:1600]
    
    # Process First Image
    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 5, 0 , f_mesma)
    f_mesma = np.where(f_mesma > 200, 0, f_mesma)

    if only_overlap:
        f_mesma = np.where(s_img[0] == 0, f_mesma, 0)
        f_mesma = np.where(f_img[0] == 0, f_mesma, 0)
    else:
        f_mesma = np.where(s_img[0] == 2, 0, f_mesma)

    f_kelp = np.where(f_img[0] == 0, 1, 0)
    f_kelp = np.where(s_img[0] == 2, 0, f_kelp)

    if kelp_map is not None:
        f_mesma = np.where(kelp_map, f_mesma, 0)
        f_kelp = np.where(kelp_map, f_kelp,0)

    f_kelp_pixels = np.sum(f_kelp)
    f_sum = np.sum(f_mesma)
    # Process Second Image
    s_mesma = np.array(s_img[1])
    s_mesma = np.where(s_mesma < 5, 0 , s_mesma)
    s_mesma = np.where(s_mesma > 200, 0, s_mesma)

    f_clouds = np.sum(np.where(f_img[0] ==2, 1, 0))
    s_clouds = np.sum(np.where(s_img[0] ==2,1,0))


    if only_overlap:
        s_mesma = np.where(f_img[0] == 0, s_mesma, 0)
        s_mesma = np.where(s_img[0] == 0, s_mesma, 0)
    else:
        s_mesma = np.where(f_img[0] == 2, 0, s_mesma)

    s_kelp = np.where(s_img[0] == 0, 1, 0)
    s_kelp = np.where(f_img[0] == 2, 0, s_kelp)

    if kelp_map is not None:
        s_mesma = np.where(kelp_map, s_mesma, 0)
        s_kelp = np.where(kelp_map, s_kelp, 0)

    s_kelp_pixels = np.sum(s_kelp)
    s_sum = np.sum(s_mesma)
    #print(s_sum)
    
    data = [file1, f_sum, f_kelp_pixels,f_current,f_tide,f_clouds, file2,s_sum,s_kelp_pixels,s_current,s_tide, s_clouds]
    if mesma_residuals:
        mesma_res = f_mesma - s_mesma
        return data , mesma_res, f_mesma, s_mesma
    return data

def get_mesma_residuals(path, file1, file2, crop=False, only_overlap=False):
    bands=[5,6]
    f_data = load_processed_img(path,file1, bands=bands, just_data=True)
    if f_data is None:
        return None
    f_img, f_tide, f_current = f_data
    s_data = load_processed_img(path,file2, bands=bands, just_data=True)
    if s_data is None:
        return None
    s_img, s_tide, s_current = s_data
    if crop:
        s_img = s_img[:,2800:3050,850:1600]
        f_img = f_img[:,2800:3050,850:1600]

    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 5, 0 , f_mesma)
    f_mesma = np.where(f_mesma > 200, 0, f_mesma)
    if only_overlap:
        f_mesma = np.where(s_img[0] == 0, f_mesma, 0)
    else:
        f_mesma = np.where(s_img[0] == 2, 0, f_mesma)


    s_mesma = np.array(s_img[1])
    s_mesma = np.where(s_mesma < 5, 0 , s_mesma)
    s_mesma = np.where(s_mesma > 200, 0, s_mesma)
    #
    if only_overlap:
        s_mesma = np.where(f_img[0] == 0, s_mesma, 0)
    else:
        s_mesma = np.where(f_img[0] == 2, 0, s_mesma)


    mesma_res = f_mesma - s_mesma
    return mesma_res, f_mesma, s_mesma

def get_col_keys():

    return  ['img1','f_mesma', 'f_kelp_pixels', 'f_current', 'f_tide','f_clouds', 'img2', 's_mesma', 's_kelp_pixels', 's_current','s_tide','s_clouds']

def plot_pair_values(df, color_basis='', color_title='', single_color_var=False, vmin=None, vmax=None):
    f_mesma = df['f_mesma'].astype(int)
    s_mesma= df['s_mesma'].astype(int)
    f_kelp = df['f_kelp_pixels'].astype(int)
    s_kelp= df['s_kelp_pixels'].astype(int)
    if(single_color_var and not color_basis == ''):
        colors = df[color_basis].astype(float)
    elif color_basis == '':
        colors= df['f_clouds'].astype(float) - df['s_clouds'].astype(float) 
    else:
        colors= df[f's_{color_basis}'].astype(float) + df[f'f_{color_basis}'].astype(float)

    if vmin == None:
        vmin= np.min(colors)
    if vmax == None:
        vmax = np.max(colors)
    min_val = min(f_mesma.min(), s_mesma.min())
    max_val = max(f_mesma.max(), s_mesma.max())
    x = np.linspace(min_val, max_val, 100)
    y = x
    slope, intercept = np.polyfit(f_mesma, s_mesma, 1)
    print(slope, intercept)
    y_fit = slope * x + intercept
    plt.figure(figsize=(18,6))
    plt.subplot(1, 2, 1) 
    scatter_1 = plt.scatter(f_mesma, s_mesma, c=colors, vmin=vmin, vmax=vmax, alpha=1)
    plt.plot(x, y, color='red', label='y = x')
    plt.colorbar(scatter_1, label=color_title)
    plt.legend()
    plt.xlabel('Image 1')
    plt.ylabel('Image 2')
    plt.title('Mesma Pixel Summation Comparison')

    plt.subplot(1,2,2)
    scatter_2 = plt.scatter(f_kelp, s_kelp, c=colors, vmin=vmin, vmax=vmax, alpha=1)
    plt.colorbar(scatter_2, label=color_title)
    plt.legend()
    plt.xlabel('Image 1')
    plt.ylabel('Image 2')
    plt.title('Classified Pixel Count Comparison')
    plt.show()

def view_rgb(path, file1, file2, crop=False,  title_1='rgb1', title_2='rgb2'):
    img_1 = load_processed_img(path,file1, bands=[1,2,3,5,6], just_data=True, crop=crop)
    img_2 = load_processed_img(path,file2, bands=[1,2,3,5,6], just_data=True, crop=crop)
    rgb_1 = np.stack([img_1[2], img_1[1], img_1[0]], axis=-1)
    rgb_2 = np.stack([img_2[2], img_2[1], img_2[0]], axis=-1)
    mesma1 = img_1[3]
    mesma2 = img_2[3]
    kelp1 = img_1[4]
    kelp2 = img_2[4]

    plt.figure(figsize=(15, 15)) 
    plt.subplot(3, 2, 1) 
    plt.imshow(rgb_1)
    plt.title(title_1)
    plt.subplot(3, 2, 2) 
    plt.imshow(rgb_2)
    plt.title(title_2)
    plt.subplot(3, 2, 3) 
    plt.imshow(kelp1)
    plt.title(title_1)
    plt.subplot(3, 2, 4) 
    plt.imshow(kelp2)
    plt.title(title_2)
    plt.subplot(3, 2, 5) 
    plt.imshow(mesma1)
    plt.title(title_1)
    plt.subplot(3, 2, 6) 
    plt.imshow(mesma2)
    plt.title(title_2)
    plt.show()

def plot_four(plot1, plot2, plot3, plot4=None, title1='plot 1', title2 = 'plot 2', title3='plot 3', title4='plot4'):
    v_min = np.min([np.min(plot1), np.min(plot2),np.min(plot3)])
    v_max = np.max([np.max(plot1), np.max(plot2),np.max(plot3)])
    plt.figure(figsize=(20,10))
    plt.subplot(2, 2, 1)
    plt.imshow(plot1, vmax = v_max)
    plt.colorbar()
    plt.title(title1)
    plt.subplot(2,2,2)
    plt.imshow(plot2, vmax = v_max)
    plt.colorbar()
    plt.title(title2)
    plt.subplot(2, 2, 3)
    plt.imshow(plot3, vmax = v_max, vmin=-v_max)
    plt.colorbar()
    plt.title(title3)
    if(plot4 is not None):
        plt.subplot(2, 2, 4)
        plt.imshow(plot4)
        plt.colorbar()
        plt.title(title4)
    plt.show()
def plot_tide(df):
    tide_diff = df['f_tide'] - df['s_tide']

    mesma_ht = np.where(tide_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lt = np.where(tide_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(tide_diff)
    mesma_diff = (mesma_lt - mesma_ht) / mesma_ht

    plt.figure()
    plt.scatter(tide_diff, mesma_diff)
    plt.title("Water Height difference vs Kelp Detection")
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Water Height")
    #plt.ylim(0, 5)
    plt.show()
def plot_current(df):
    current_diff = df['f_current'] - df['s_current']

    mesma_hc = np.where(current_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lc = np.where(current_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(current_diff)
    mesma_diff = (mesma_lc - mesma_hc) / mesma_ht

    plt.figure()
    plt.scatter(current_diff, mesma_diff)
    plt.title("Water Height difference vs Kelp Detection")
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Current Magnitude")
    #plt.ylim(0, 5)
    plt.show()

def plot_tide_current(df):
    current_diff = df['f_current'] - df['s_current']
    mesma_ht = np.where(current_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lt = np.where(current_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(current_diff)
    mesma_diff = (mesma_lt - mesma_ht) / mesma_ht
    tide_diff = df['f_tide'] - df['s_tide']
    mesma_ht = np.where(tide_diff > 0, df['f_mesma'], df['s_mesma'])
    mesma_lt = np.where(tide_diff <= 0, df['f_mesma'], df['s_mesma'])
    tide_diff = abs(tide_diff)
    mesma_diff = (mesma_lt - mesma_ht) / mesma_ht

    plt.figure(figsize=(15,6))
    plt.subplot(1, 2, 1) 
    plt.title("Water Height difference vs Kelp Detection")
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Water Height")
    plt.scatter(tide_diff, mesma_diff*100)
    plt.subplot(1,2,2)
    plt.scatter(current_diff, mesma_diff*100)
    plt.ylabel("Percent Change in Kelp Biomass Detection")
    plt.xlabel("Difference in Current Magnitude")
    plt.show()

def analyze_mesma_pixel(path, file1, file2, bands=[5,6], crop=False, residuals=False, kelp_map=None):
    f_data = load_processed_img(path,file1, bands=bands, just_data=True, crop=crop)
    if f_data is None:
        return None
    #print(f_data)
    f_img = f_data
    s_data = load_processed_img(path,file2, bands=bands, just_data=True, crop=crop)
    if s_data is None:
        return None
    s_img = s_data
    
    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 5, 0 , f_mesma) #remove values < 10
    f_mesma = np.where(f_mesma > 200, 0 , f_mesma)
    f_mesma = np.where(s_img[0] == 2, 0, f_mesma) #Remove values that are clouds in other image
    if kelp_map is not None:
        f_mesma = np.where(kelp_map, f_mesma, 0)
    f_sum = np.sum(f_mesma)

    s_mesma = np.array(s_img[1])
    s_mesma = np.where(s_mesma < 5, 0 , s_mesma) #Remove values <10
    s_mesma = np.where(s_mesma > 200, 0 , s_mesma) 
    s_mesma = np.where(f_img[0] == 2, 0, s_mesma) #remove pixels clouded in other image
    if kelp_map is not None:
        s_mesma = np.where(kelp_map, s_mesma, 0)
    s_sum = np.sum(s_mesma)

    s_mesma_binary = np.where(s_mesma > 0 , 1, 0)
    f_mesma_binary = np.where(f_mesma > 0,1 ,0)

    sf_mesma_binary = np.where(s_mesma_binary, f_mesma_binary, 0)

    s_count = np.sum(s_mesma_binary)
    f_count = np.sum(f_mesma_binary)
    sf_count = np.sum(sf_mesma_binary)
    #s_mesma = np.where(s_mesma==0, np.nan, s_mesma)
    #f_mesma = np.where(f_mesma==0, np.nan,f_mesma)
    if residuals:
        resids = f_mesma - s_mesma
        abs_resids = abs(resids)
        resids_sum = np.sum(abs_resids)
        #abs_resids = np.where(resids == 0, np.nan, resids)
        return sf_count, s_sum, f_sum, s_count, f_count, s_mesma, f_mesma, resids, resids_sum
    else:
        return sf_count, s_sum, f_sum, s_count, f_count, s_mesma, f_mesma

def get_mesma_EMs(file='EM_reformatted_dict_v4.pkl', path=r'C:\Users\attic\HLS_Kelp\python_objects'):
    endmember_path = os.path.join(path,file)
    with open(endmember_path, 'rb') as f:
        endmember_dict = pickle.load(f)
    return endmember_dict

def get_granule(filename):
    match = re.match(r'^(.*)_processed\.tif$', filename)
    if match:
        extracted_part = match.group(1)
        return extracted_part
    else:
        print("invalid file name")
        return None
    
def get_image_pixel_sums(path, file, crop=False, bands=[5,6], kelp_map=None , cloud_correction=False ):
    f_data = load_processed_img(path,file, bands=bands, geo_info=False,cloud_coverage=True,crop=crop, date_return=True)
    if f_data is None:
        return None
    #print(f_data)
    f_img, day_num, date, f_tide, f_current, f_clouds = f_data
    
    # Process First Image
    f_mesma = np.array(f_img[1])
    f_mesma = np.where(f_mesma < 5, 0 , f_mesma)
    f_mesma = np.where(f_mesma > 200, 0, f_mesma)

    f_kelp = np.where(f_img[0] == 0, 1, 0)
    cloud_correction_factor = None
    if kelp_map is not None:
        f_mesma = np.where(kelp_map, f_mesma, 0)
        f_kelp = np.where(kelp_map, f_kelp,0)
        cloud_over_kelp = np.where(f_img[0] == 2,kelp_map, 0)
        clouds_over_kelp_sum = np.sum(cloud_over_kelp)
        kelp_pixels = np.sum(kelp_map)
        cloud_correction_factor = kelp_pixels/(kelp_pixels-clouds_over_kelp_sum).astype(float)
    f_kelp_pixels = np.sum(f_kelp)
    f_sum = np.sum(f_mesma)
    if cloud_correction and cloud_correction_factor is not None:
        f_sum = f_sum * cloud_correction_factor
        
    data = [file, day_num, date, f_sum, f_kelp_pixels,f_current,f_tide,f_clouds, cloud_correction_factor]
    return data

def extract_date(filename):
    match = re.search(r'\.(\d{7})T', filename)
    if match:
        date_str = match.group(1)
        date = datetime.strptime(date_str, '%Y%j')
        return date
    return None

def sort_filenames_by_date(filenames):
    date_filename_pairs = [(extract_date(filename), filename) for filename in filenames]
    date_filename_pairs.sort(key=lambda x: x[0])
    sorted_filenames = [filename for _, filename in date_filename_pairs]
    return sorted_filenames

In [None]:
version =4
path = rf'H:\HLS_data\imagery\Isla_vista_kelp\processed_v{4}\11SKU'
filenames = os.listdir(path)
pairs = group_by_date(filenames, max_days=2, max_pair_size=2)
print(f'Pairs found: {len(pairs)}')

In [None]:
data_array = []
kelp_map = load_processed_img("H:\HLS_data\imagery\Isla_vista_kelp\processed_v4", 'kelp_map.tif', bands=[1], crop=True, just_data=True)
kelp_map = kelp_map[0]
for pair in pairs:
    files = pair[1]
    data = get_mesma_pixel_sums(path, files[0], files[1], crop=True, only_overlap=False, kelp_map=kelp_map)
    if data is not None:
        data_array.append(data)
        #print(data)
    
data_array =np.stack(data_array)
df = pd.DataFrame(data_array, columns=get_col_keys())


In [None]:
load_processed_img(path,filenames[0], date_return=True)

In [None]:

length = len(filenames)
image = []
for i,file in enumerate(filenames):
    data = load_processed_img(path,file,bands=[5],just_data=True)
    if data is None:
        continue
    kelp = np.where(data==0, 1,0)
    image.append(kelp)
    if i % 10 == 0:
        print(f'{i}/{length}')    

image = np.array(image)  # Convert list to 3D NumPy array
summed_image = np.sum(image, axis=0)

files = os.listdir(path)
for file in files:
    match = re.match(r'^.*\.tif$', file)
    if match is not None:
        img_file = file
        break
print(img_file)
packet = load_processed_img(path,img_file,bands=[1],geo_info=True)

data, tide, current, clouds, transform, crs = packet

bands,width,height = data.shape

data_type = rasterio.int8
profile = {
    'driver': 'GTiff',
    'width': width,
    'height': height,
    'count': 1,  # one band  B02, B03, B04, and B05, classified, mesma (Blue, Green, Red, and NIR).
    'dtype': data_type,  # assuming binary mask, adjust dtype if needed
    'crs': crs,
    'transform': transform,
    'nodata': -1,  # assuming no data is 0
    'tags': { 'VERSION':version }
}
try:
    with rasterio.open(os.path.join(path,'kelp_map.tif'), 'w', **profile) as dst:
        dst.write((kelp_map[0]).astype(data_type), 1)
    print(f"saved to: {os.path.join(path,'kelp_map.tif')} ")
except RasterioIOError as e:
    print(f"Error reading file {file}: {e}")

kelp_map = np.where(summed_image > 10, 1,0)
print(kelp_map.shape)
plt.figure(figsize=(25,25))
plt.imshow(kelp_map[0,2500:3600,200:1750])
plt.show()
plt.figure(figsize=(25,25))
plt.imshow(summed_image[0,2500:3600,200:1750])
plt.show()

In [None]:
print(filenames)

In [None]:
test_kelp_image =load_processed_img(path, 'kelp_map.tif', bands=[1], crop=True, just_data=True)

plt.figure()
plt.imshow(test_kelp_image[0])
plt.show()

In [None]:
#Create a time series plot of kelp: 
files = os.listdir(path)
sorted_files = sort_filenames_by_date(files)
image_data_list =[]
for file in sorted_files:
    packet = get_image_pixel_sums(path,file,crop=True,kelp_map=kelp_map)
    if packet is not None:
        image_data_list.append(packet)
image_data_list = np.stack(image_data_list)

In [None]:
keys = ['file','day_num', 'date', 'mesma', 'kelp_pixels','current','tide','clouds', 'cloud_correction_factor']
ts_df = pd.DataFrame(image_data_list, columns=keys)
ts_df['kelp_pixels'] = ts_df['kelp_pixels'].astype(int)
ts_df['day_num'] = ts_df['day_num'].astype(float)
ts_df['mesma'] = ts_df['mesma'].astype(float)
ts_df['tide'] = ts_df['tide'].astype(float)
ts_df['clouds'] = ts_df['clouds'].astype(float)
ts_df['current'] = ts_df['current'].astype(float)
ts_df['cloud_correction_factor'] = ts_df['cloud_correction_factor'].astype(float)
ts_df['date'] = [datetime.strptime(date_str[:26], "%Y-%m-%dT%H:%M:%S.%f") for date_str in ts_df['date']]
ts_df_filtered = ts_df[(ts_df['mesma'] <= 10000000) ]
ts_df_filtered = ts_df_filtered[(ts_df_filtered['cloud_correction_factor'] <1.5)]


In [None]:
plt.figure()
plt.plot(ts_df_filtered['date'],ts_df_filtered['mesma'])#), c=ts_df['clouds'], cmap='Blues')

plt.show()

In [None]:
#Create a time series plot of kelp: 
files = os.listdir(path)
sorted_files = sort_filenames_by_date(files)
image_data_list_l30 =[]
for file in sorted_files:
    if get_sensor(file) == 'S30':
        continue
    packet = get_image_pixel_sums(path,file,crop=True,kelp_map=kelp_map)
    if packet is not None:
        image_data_list_l30.append(packet)
image_data_list_l30= np.stack(image_data_list_l30)



In [None]:
keys = ['file','day_num', 'date', 'mesma', 'kelp_pixels','current','tide','clouds', 'cloud_correction_factor']
ts_df_l30 = pd.DataFrame(image_data_list_l30, columns=keys)
ts_df_l30['kelp_pixels'] = ts_df_l30['kelp_pixels'].astype(int)
ts_df_l30['day_num'] = ts_df_l30['day_num'].astype(float)
ts_df_l30['mesma'] = ts_df_l30['mesma'].astype(float)
ts_df_l30['tide'] = ts_df_l30['tide'].astype(float)
ts_df_l30['clouds'] = ts_df_l30['clouds'].astype(float)
ts_df_l30['current'] = ts_df_l30['current'].astype(float)
ts_df_l30['cloud_correction_factor'] = ts_df_l30['cloud_correction_factor'].astype(float)
ts_df_l30['date'] = [datetime.strptime(date_str[:26], "%Y-%m-%dT%H:%M:%S.%f") for date_str in ts_df_l30['date']]
ts_df_l30_filtered = ts_df_l30[(ts_df_l30['mesma'] <= 10000000) ]
ts_df_l30_filtered = ts_df_l30_filtered[(ts_df_l30_filtered['clouds'] <)]


In [None]:
plt.figure()
plt.plot(ts_df_l30_filtered['date'],ts_df_l30_filtered['mesma'], color='Red')#), c=ts_df['clouds'], cmap='Blues')
plt.plot(ts_df_filtered['date'],ts_df_filtered['mesma'], color='Blue')
plt.show()

In [None]:
endmembers = get_mesma_EMs()
df['f_mean'] = [None] * len(df)
df['f_stddev'] = [None] * len(df)
df['s_mean'] = [None] * len(df)
df['s_stddev'] = [None] * len(df)
df['s_avg_stddev'] = np.nan
df['f_avg_stddev'] = np.nan
df['fs_avg_stddev'] = np.nan
df[ 'em_mean_squared_var'] = np.nan
for i, pair in df.iterrows():
    f_img = pair['img1']
    s_img = pair['img2']
    f_ems = endmembers[get_granule(f_img)]
    s_ems = endmembers[get_granule(s_img)]

    f_mean = np.mean(f_ems, axis=1)
    f_stddev = np.std(f_ems, axis=1)
    s_mean = np.mean(s_ems, axis=1)
    s_stddev = np.std(s_ems, axis=1)
    s_avg_stddev = np.mean(s_stddev)
    f_avg_stddev = np.mean(f_stddev)
    df.at[i, 'f_mean'] = f_mean.tolist()
    df.at[i, 'f_stddev'] = f_stddev.tolist()
    df.at[i, 's_mean'] = s_mean.tolist()
    df.at[i, 's_stddev'] = s_stddev.tolist()
    df.at[i, 'em_mean_squared_var'] = np.mean((f_mean-s_mean)**2)
    df.at[i, 's_avg_stddev'] = s_avg_stddev
    df.at[i, 'f_avg_stddev'] = f_avg_stddev
    df.at[i, 'fs_avg_stddev'] = f_avg_stddev - s_avg_stddev

In [None]:

df['f_kelp_pixels'] = df['f_kelp_pixels'].astype(int)
df['s_kelp_pixels'] = df['s_kelp_pixels'].astype(int)
df['f_mesma'] = df['f_mesma'].astype(int)
df['s_mesma'] = df['s_mesma'].astype(int)
df['f_tide'] = df['f_tide'].astype(float)
df['s_tide'] = df['s_tide'].astype(float)
df['f_current'] = df['f_current'].astype(float)
df['s_current'] = df['s_current'].astype(float)
df['percent_change'] = 2 *(abs(df['f_mesma'] - df['s_mesma'])) / (df['s_mesma'] + df['f_mesma'])

filtered_df = df[(df['f_mesma'] <= 100000) & (df['s_mesma'] <= 100000)]
filtered_df = filtered_df[(filtered_df['f_kelp_pixels'] <= 6000) & (filtered_df['s_kelp_pixels'] <= 6000)]

In [None]:
outlier_df = filtered_df[(filtered_df['percent_change'] > .5)]
uniform_df = filtered_df[(filtered_df['percent_change'] <= .5)]

#print(outlier_df['percent_change'])
plot_pair_values(outlier_df,color_basis='em_mean_squared_var', color_title='endmember mean squared variance' ,  vmax=2000, single_color_var=True)
plot_pair_values(filtered_df,color_basis='em_mean_squared_var', color_title='endmember mean squared variance', vmax=2000, single_color_var=True)



In [None]:
plt.figure()

quality_df = uniform_df[(uniform_df['f_kelp_pixels'] <= 2000) & (uniform_df['s_kelp_pixels'] <= 2000)]
quality_df = quality_df[(uniform_df['f_kelp_pixels'] >= 200) & (uniform_df['s_kelp_pixels'] >=200)]
sum_clouds = quality_df['f_clouds'].astype(float) +quality_df['s_clouds'].astype(float)
plot_tide_current(quality_df)
#plt.scatter(quality_df['percent_change'],sum_clouds)
#plt.ylim([0,10000])


In [None]:
for i, pair in df.iterrows():
    file = pair['img1']
    img = load_processed_img(path,file,bands=[7],just_data=True, crop=True)
    print(file)
    plt.figure()
    plt.imshow(img[0,:,:])
    plt.colorbar()
    plt.show()

In [None]:
low_resids_list = []
for i, pair in df.iterrows():
    print(pair['img1'], pair['img2'])
    packet = analyze_mesma_pixel(path, pair['img1'], pair['img2'], crop=True, kelp_map=kelp_map, residuals=True)
    sf_count, s_sum, f_sum, s_count, f_count, s_mesma, f_mesma, resids, resids_sum = packet
    if (2*resids_sum / (s_sum + f_sum)).astype(float) <.3:
        low_resids_list.append(pair)

low_resids_df = pd.DataFrame(low_resids_list)
    #print(sf_count, f_count, s_count, s_sum, f_sum, resids_sum)
    #plot_four(s_mesma, f_mesma, resids)

In [None]:
plot_tide_current(low_resids_df)

In [None]:
data_array=[]
for i,pair in uniform_df.iterrows():
    packet = get_mesma_pixel_sums(path, pair['img1'], pair['img2'], mesma_residuals=True, crop=True,only_overlap=True )
    data , mesma_res, f_mesma, s_mesma = packet
    file1, f_sum, f_kelp_pixels,f_current,f_tide,f_clouds, file2,s_sum,s_kelp_pixels,s_current,s_tide, s_clouds = data
    sma_resids = np.mean(mesma_res**2)
    sma_count = np.sum(sma_resids)
    scale_factor = float(f_sum/s_sum)

    resids_scaled = f_mesma - s_mesma * scale_factor
    sma_resids_scaled = np.mean(resids_scaled**2)
    sma_count_scaled = np.sum(sma_resids_scaled)
    data_array.append([f_sum,s_sum,sma_count,sma_count_scaled])
    if i % 10 == 0:
        print(i)
data_new = np.stack(data_array)
print(data_new)

In [None]:
print(np.mean(data_new[:,3]/data_new[:,2]))

In [None]:

plt.scatter(data_new[:,1], data_new[:,2], color='Blue')
plt.scatter(data_new[:,1], data_new[:,3], color='Red')
plt.ylim(0,5)


In [None]:
for i, pair in outlier_df.iterrows():
    print(pair['img1'], pair['img2'])
    view_rgb(path, pair['img1'], pair['img2'], title_1=pair['img1'], title_2=pair['img2'], crop=True)

In [None]:
plot_tide_current(uniform_df)

In [None]:
plot_pair_values(filtered_df)