# Image Generator
This notebook was created to ease the image generation process, i.e turning the netCDF data into something the OF algorithms can take as input and saving it to the hard drive.

## Importing necessary libraries and notebooks

In [None]:
import xarray as xr
import os
import cv2
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from datetime import datetime, timedelta
from matplotlib import ticker
from IPython.display import Image, display, HTML

# Import the other notebooks without running their cells
from ii_Data_Manipulation import visualize_4
from iii_GOES_average import time_list, visualize_aggregate, calculate_median
from iv_Image_Processing import collect_times, save_aggregate, crop_image, process_dates, binarize_image, bilateral_image, process_directory, save_image, process_dates_2

## Antilles

## ABI_Averages_Antilles
We're going to average and process all the ABI-GOES images and save them to the directory ABI_Averages on the hard drive "ballena". Running this block might take a while. To optimize we could try and parallelize this process using the GPU.

In [None]:
if __name__ == '__main__':
    start_date = '20221121'
    end_date = '20221231'
    directory = '/media/yahia/ballena/CLS/abi-goes-global-hr' 
    output_directory = '/media/yahia/ballena/ABI_Averages_Antilles' 
    latitude_range = (12, 17)  
    longitude_range = (-67, -60) 
    
    # Calculate the 1-day averages and save them
    process_dates(start_date, end_date, directory, output_directory, latitude_range, longitude_range, color="viridis")
    
    # Paths
    source_directory = '/media/yahia/ballena/ABI_Averages_Antilles' 
    destination_directory = '/media/yahia/ballena/ABI_Averages_Antilles_Processed' 
    
    # Process the directory (filter, binarize and crop the images)
    process_directory(source_directory, destination_directory, threshold=180, bilateral=False, binarize=False)

In [None]:
# Binarized and bilateral images
if __name__ == '__main__':
    # Paths
    source_directory = '/media/yahia/ballena/ABI/ABI_Averages_Antilles' 
    destination_directory = '/media/yahia/ballena/ABI/ABI_Averages_Antilles_Binarized_Bilateral' 
    
    # Process the directory (filter, binarize and crop the images)
    process_directory(source_directory, destination_directory, threshold=100, bilateral=True, binarize=True)

In [None]:
# Binarized and bilateral images (negative)
if __name__ == '__main__':
    # Paths
    source_directory = '/media/yahia/ballena/ABI/ABI_Averages_Antilles' 
    destination_directory = '/media/yahia/ballena/ABI/ABI_Averages_Antilles_Binarized_Bilateral_Negative' 
    
    # Process the directory (filter, binarize and crop the images)
    process_directory(source_directory, destination_directory, threshold=100, bilateral=True, binarize=True, negative=True)

## MODIS_Images
The function **process_dates** we previously defined is only adapted to ABI-GOES images, we will need to write a function that does the same for MODIS and OLCI images. We will also need to do the same for **save_aggregate**.

In [None]:
def save_image(file_path, lat_range=None, lon_range=None, color="viridis", vmax=0.1, output_filepath=None):
    # Load the netCDF data
    data = xr.open_dataset(file_path)
    
    # If ranges are specified, apply them to select the desired subset
    if lat_range:
        data = data.sel(latitude=slice(*lat_range))
    if lon_range:
        data = data.sel(longitude=slice(*lon_range))

    # Determine the index data and labels based on instrument used
    index_key = 'fai_anomaly' if "abi" in file_path else 'nfai_mean'
    colorbar_label = 'Floating Algae Index Anomaly (FAI)' if "abi" in file_path else 'Normalized Floating Algae Index (NFAI)'
    title = 'FAI anomaly across the selected region on ' if "abi" in file_path else 'NFAI across the selected region on '
    
    # Extract relevant data (NFAI or FAI anomaly)
    index_data = data[index_key]

    # Set non-positive values to a very small negative number, close to zero
    index_data = xr.where(index_data > 0, index_data, -0.1)
    
    # Set up a plot with geographic projections
    fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={'projection': ccrs.PlateCarree()})
    
    # Customize the map with coastlines and features
    ax.coastlines(resolution='10m', color='black')
    ax.add_feature(cfeature.BORDERS, linestyle=':')
    ax.add_feature(cfeature.LAND, facecolor='lightgray')

    # Show gridlines only when visualizing interactively, not when saving the output
    if output_filepath is None:
        gl = ax.gridlines(draw_labels=True, linewidth=1, color='gray', alpha=0.5, linestyle='--')
        gl.top_labels = False
        gl.right_labels = False
        cbar_kwargs = {'shrink': 0.35}
    else:
        cbar_kwargs = None

    # Plot the data with the modified contrast
    im = index_data.plot(ax=ax, x='longitude', y='latitude', transform=ccrs.PlateCarree(),
                         cmap=color, add_colorbar=True, extend='both',
                         vmin=-0.01, vmax=vmax,  # Here we set the scale to max out at 0.5
                         cbar_kwargs={'shrink': 0.35})

    # Set title and colorbar only when visualizing interactively
    if output_filepath is None:
        im.colorbar.set_label('Normalized Floating Algae Index (NFAI)')
        plot_date = data.attrs.get('date', 'Unknown Date')
        plt.title(f"Algae Distribution on {plot_date}")

    if output_filepath:
        plt.savefig(output_filepath)  
        plt.close(fig)  
    else:
        plt.show()  

In [None]:
def process_dates2(start_date, end_date, directory, output_dir, lat_range=None, lon_range=None, color="viridis"):
    # Convert the start and end dates from strings to datetime objects
    current_date = datetime.strptime(start_date, '%Y%m%d')
    end_date = datetime.strptime(end_date, '%Y%m%d')
    
    while current_date <= end_date:
        # Format the current date as a string in 'YYYYMMDD' format
        date_str = current_date.strftime('%Y%m%d')
        
        # Prepare the output file path for the current day's visualization
        # Visualize the median algae distribution and save it using the provided visualization function
        if "modis" in directory:
            output_file_path = os.path.join(output_dir, f'MODIS_{date_str}.png')
            file_path = directory + f"/cls-modis-aqua-global-lr_1d_{date_str}.nc"
        elif "olci" in directory:
            output_file_path = os.path.join(output_dir, f'OLCI_{date_str}.png')
            file_path = directory + f"/cls-olci-s3-global-lr_1d_{date_str}.nc"

        # Check if the file exists before proceeding
        if not os.path.exists(file_path):
            print(f"File not found for date: {date_str}, skipping...")
        else:
            try:
                save_image(file_path, lat_range, lon_range, color=color, output_filepath=output_file_path)
            except Exception as e:
                print(f"Failed to process {file_path}: {e}")
    
        # Increment the current date by one day
        current_date += timedelta(days=1)

Generating the MODIS images:

In [None]:
if __name__ == '__main__':
    start_date = '20201207'
    end_date = '20221231'
    directory = '/media/yahia/ballena/CLS/modis-aqua-global-lr' 
    output_directory = '/media/yahia/ballena/MODIS_Antilles' 
    latitude_range = (12, 17)  
    longitude_range = (-67, -60) 
    
    # Calculate the 1-day averages and save them
    process_dates2(start_date, end_date, directory, output_directory, latitude_range, longitude_range, color="viridis")
    
    # Paths
    source_directory = '/media/yahia/ballena/MODIS_Antilles' 
    destination_directory = '/media/yahia/ballena/MODIS_Antilles_Processed' 
    
    # Process the directory (filter, binarize and crop the images)
    process_directory(source_directory, destination_directory, threshold=180, bilateral=False, binarize=False)

## OLCI_Images

Generating the OLCI images:

In [None]:
if __name__ == '__main__':
    start_date = '20201207'
    end_date = '20240122'
    directory = '/media/yahia/ballena/CLS/olci-s3-global-lr' 
    output_directory = '/media/yahia/ballena/OLCI_Antilles' 
    latitude_range = (12, 17)  
    longitude_range = (-67, -60) 
    
    # Calculate the 1-day averages and save them
    process_dates2(start_date, end_date, directory, output_directory, latitude_range, longitude_range, color="viridis")
    
    # Paths
    source_directory = '/media/yahia/ballena/OLCI_Antilles' 
    destination_directory = '/media/yahia/ballena/OLCI_Antilles_Processed' 
    
    # Process the directory (filter, binarize and crop the images)
    process_directory(source_directory, destination_directory, threshold=180, bilateral=False, binarize=False)