Key functionality: downloads only the IDs already present in the marked_images folder

## Download data

In [1]:
import os
import requests
import pandas as pd
import math

# Parameters
csv_file = "seamounts.csv"  # Path to the .csv file
marked_images_folder = "marked_images"  # Folder containing already marked images
output_folder = "seamounts_seg"  # Folder to save downloaded images
tile_pixels = 800  # Width and height of the image in pixels
tile_width_deg = 1.0  # Fixed width in degrees
api_base_url = "https://www.gmrt.org/services/ImageServer"

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Get a set of PEAKIDs from the marked_images folder
marked_peaks = set()
for file_name in os.listdir(marked_images_folder):
    if file_name.startswith("marked_") and file_name.endswith(".png"):
        # Extract the PEAKID from the file name
        peak_id = file_name[len("marked_"):-len(".png")]
        marked_peaks.add(peak_id)

print(f"Found {len(marked_peaks)} marked images in {marked_images_folder}.")

# Load CSV file
try:
    df = pd.read_csv(csv_file)
    print(f"Loaded {len(df)} records from {csv_file}.")
except Exception as e:
    print(f"Error loading .csv file: {e}")
    exit()

# Process each record
for index, row in df.iterrows():
    try:
        # Extract data from the row
        file_name = str(row["PEAKID"])  # Use PEAKID as the file name
        center_lon = float(row["LONG"])  # Longitude from LONG
        center_lat = float(row["LAT"])  # Latitude from LAT

        # Check if this PEAKID exists in the marked_images folder
        if file_name not in marked_peaks:
            print(f"Skipping PEAKID {file_name} as it's not in marked_images.")
            continue  # Skip this PEAKID

        # Calculate the adjusted height for square tiles
        adjusted_height_deg = tile_width_deg * math.cos(math.radians(center_lat))

        # Calculate tile bounds
        minlatitude = center_lat - (adjusted_height_deg / 2)
        maxlatitude = center_lat + (adjusted_height_deg / 2)
        minlongitude = center_lon - (tile_width_deg / 2)
        maxlongitude = center_lon + (tile_width_deg / 2)

        # Debugging: Print calculated values
        print(f"PEAKID: {file_name}")
        print(f"Center Lat: {center_lat}, Center Lon: {center_lon}")
        print(f"Adjusted Height (degrees): {adjusted_height_deg}")
        print(f"Bounds: {minlatitude} to {maxlatitude}, {minlongitude} to {maxlongitude}")

        # Prepare API parameters
        params = {
            "minlatitude": minlatitude,
            "maxlatitude": maxlatitude,
            "minlongitude": minlongitude,
            "maxlongitude": maxlongitude,
            "width": tile_pixels,
            "mask": "false",
            "download": "true",
        }

        # Construct the output file path
        output_file = os.path.join(output_folder, f"{file_name}.png")

        # Download the image
        print(f"Downloading square image for PEAKID {file_name} at {center_lat}, {center_lon}...")
        response = requests.get(api_base_url, params=params)

        if response.status_code == 200:
            # Save the image
            with open(output_file, "wb") as file:
                file.write(response.content)
            print(f"Saved: {output_file}")
        else:
            print(f"Failed to download for PEAKID {file_name} - HTTP {response.status_code}")

    except Exception as e:
        print(f"Error processing row {index}: {e}")

print("Download complete.")


Found 499 marked images in marked_images.
Loaded 33452 records from seamounts.csv.
Skipping PEAKID 26000.0 as it's not in marked_images.
Skipping PEAKID 26157.0 as it's not in marked_images.
Skipping PEAKID 26158.0 as it's not in marked_images.
Skipping PEAKID 26228.0 as it's not in marked_images.
Skipping PEAKID 26229.0 as it's not in marked_images.
Skipping PEAKID 26297.0 as it's not in marked_images.
Skipping PEAKID 26298.0 as it's not in marked_images.
Skipping PEAKID 26407.0 as it's not in marked_images.
Skipping PEAKID 26408.0 as it's not in marked_images.
Skipping PEAKID 26653.0 as it's not in marked_images.
Skipping PEAKID 26664.0 as it's not in marked_images.
Skipping PEAKID 26665.0 as it's not in marked_images.
Skipping PEAKID 27029.0 as it's not in marked_images.
Skipping PEAKID 27045.0 as it's not in marked_images.
Skipping PEAKID 27112.0 as it's not in marked_images.
Skipping PEAKID 27347.0 as it's not in marked_images.
Skipping PEAKID 27393.0 as it's not in marked_images.

To load more seamounts, it would be necessary to find out the highest number in the image titles, then to modify the .csv to only contain the seamounts after that and then to run the above script again.

## Filter high-res data

In [3]:
import cv2
import os
import numpy as np

def crop_fixed_border(image, crop_pixels=70):
    """Crop a fixed number of pixels from each border."""
    height, width, _ = image.shape
    if height > crop_pixels * 2 and width > crop_pixels * 2:
        return image[crop_pixels:height-crop_pixels, crop_pixels:width-crop_pixels]
    else:
        print(f"Warning: Crop size too large for image {width}x{height}. Returning original image.")
        return image

def detect_blur(image, threshold=100.0):
    """Detect if an image is blurry using the variance of the Laplacian."""
    # Compute the Laplacian variance
    laplacian_var = cv2.Laplacian(image, cv2.CV_64F).var()
    is_blurry = laplacian_var < threshold
    return laplacian_var, is_blurry

def process_images(input_folder, high_res_folder, low_res_folder, crop_pixels=70, threshold=100.0):
    """Process images: crop, detect blur, and organize into folders."""
    os.makedirs(high_res_folder, exist_ok=True)
    os.makedirs(low_res_folder, exist_ok=True)

    log = []
    for filename in os.listdir(input_folder):
        file_path = os.path.join(input_folder, filename)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')):
            try:
                # Load the image
                image = cv2.imread(file_path)
                if image is None:
                    print(f"Image {file_path} is corrupt or unreadable.")
                    continue
                
                # Crop the image
                cropped_image = crop_fixed_border(image, crop_pixels)
                
                # Convert to grayscale for sharpness detection
                gray_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
                
                # Detect blur
                lap_var, is_blurry = detect_blur(gray_image, threshold)
                log.append((filename, lap_var, is_blurry))
                
                # Move files to the appropriate folder
                if is_blurry:
                    os.rename(file_path, os.path.join(low_res_folder, filename))
                else:
                    os.rename(file_path, os.path.join(high_res_folder, filename))
            
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

    # Save log to a CSV file
    with open(os.path.join(high_res_folder, 'log.csv'), 'w') as log_file:
        log_file.write('Filename,Laplacian Variance,Blurry\n')
        for entry in log:
            log_file.write(f"{entry[0]},{entry[1]:.2f},{entry[2]}\n")

# Example usage
input_folder = './seamounts_galore'
high_res_folder = './seamounts_galore/high_res'
low_res_folder = './seamounts_galore/low_res'
process_images(input_folder, high_res_folder, low_res_folder, crop_pixels=70, threshold=150.0)
