Key functionality: downloads moved tiles so that the mount is not always in the center

## Download data

In [1]:
import os
import requests
import pandas as pd
import math
import random

# Parameters
seamounts_csv = "seamounts.csv"  # Path to the .csv file with seamount data
marked_images_folder = "marked_images"  # Folder containing already marked images
existing_tiles_folder = "seamounts_seg"  # Folder with existing downloaded tiles
output_folder = "seamounts_moved"  # Folder to save newly positioned images
bbox_csv = "merged_pixel_coordinates.csv"  # CSV file with bounding box coordinates
tile_pixels = 800  # Width and height of the image in pixels
tile_width_deg = 1.0  # Fixed width in degrees
api_base_url = "https://www.gmrt.org/services/ImageServer"

# Correction percentages for bounding box shifts
mod_w_pct = 0.3  # Reduce width shift by % of the calculated shift
mod_h_pct = 0.3  # Reduce height shift by % of the calculated shift

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Load seamounts CSV
try:
    seamounts_df = pd.read_csv(seamounts_csv)
    print(f"Loaded {len(seamounts_df)} records from {seamounts_csv}.")
except Exception as e:
    print(f"Error loading seamounts CSV: {e}")
    exit()

# Load bounding box CSV
try:
    bbox_df = pd.read_csv(bbox_csv)
    print(f"Loaded {len(bbox_df)} bounding box records from {bbox_csv}.")
except Exception as e:
    print(f"Error loading bounding box CSV: {e}")
    exit()

# Function to calculate new bounding box coordinates
def adjust_bounding_box(bbox, width_shift_pct, height_shift_pct, mod_w_pct, mod_h_pct):
    """
    Adjust bounding box coordinates based on shifts in tile center,
    applying percentage-based corrections.
    """
    # Apply corrections to shifts
    corrected_width_shift = width_shift_pct * (1 - mod_w_pct)
    corrected_height_shift = height_shift_pct * (1 - mod_h_pct)

    shift_x = corrected_width_shift * tile_pixels
    shift_y = corrected_height_shift * tile_pixels

    new_bbox = {
        "x_min": bbox["x_min"] + shift_x,
        "y_min": bbox["y_min"] + shift_y,
        "x_max": bbox["x_max"] + shift_x,
        "y_max": bbox["y_max"] + shift_y,
    }
    return new_bbox

# Process each existing tile
for index, row in bbox_df.iterrows():
    try:
        # Extract data
        file_name = row["image_name"]
        if not os.path.exists(os.path.join(existing_tiles_folder, file_name)):
            print(f"Skipping {file_name} as it is not found in {existing_tiles_folder}.")
            continue

        # Parse bounding box
        bbox = {
            "x_min": row["x_min"],
            "y_min": row["y_min"],
            "x_max": row["x_max"],
            "y_max": row["y_max"],
        }

        # Match image_name with seamounts.csv to get geographical center
        peak_id = file_name.split(".")[0]
        seamount_row = seamounts_df[seamounts_df["PEAKID"] == float(peak_id)]
        if seamount_row.empty:
            print(f"Skipping {file_name} as it has no matching entry in {seamounts_csv}.")
            continue

        center_lon = float(seamount_row["LONG"].values[0])
        center_lat = float(seamount_row["LAT"].values[0])

        # Compute tile dimensions
        adjusted_height_deg = tile_width_deg * math.cos(math.radians(center_lat))

        # Generate 4 new tile positions
        for i in range(4):
            width_shift_pct = random.uniform(-0.15, 0.15)
            height_shift_pct = random.uniform(-0.15, 0.15)

            # Ensure shifts keep the bounding box within bounds
            new_bbox = adjust_bounding_box(bbox, width_shift_pct, height_shift_pct, mod_w_pct, mod_h_pct)
            if (
                new_bbox["x_min"] < 0 or new_bbox["x_max"] > tile_pixels or
                new_bbox["y_min"] < 0 or new_bbox["y_max"] > tile_pixels
            ):
                print(f"Skipping shift {width_shift_pct*100:.1f}%W, {height_shift_pct*100:.1f}%N as it exceeds bounds.")
                continue

            # Calculate new center
            new_center_lon = center_lon - width_shift_pct * tile_width_deg  # Subtract for west, add for east
            new_center_lat = center_lat + height_shift_pct * adjusted_height_deg

            # Download the new tile
            minlatitude = new_center_lat - (adjusted_height_deg / 2)
            maxlatitude = new_center_lat + (adjusted_height_deg / 2)
            minlongitude = new_center_lon - (tile_width_deg / 2)
            maxlongitude = new_center_lon + (tile_width_deg / 2)

            params = {
                "minlatitude": minlatitude,
                "maxlatitude": maxlatitude,
                "minlongitude": minlongitude,
                "maxlongitude": maxlongitude,
                "width": tile_pixels,
                "mask": "false",
                "download": "true",
            }

            new_file_name = f"{file_name.split('.')[0]}_{int(width_shift_pct*100)}W_{int(height_shift_pct*100)}N.png"
            output_file = os.path.join(output_folder, new_file_name)

            print(f"Downloading shifted tile: {new_file_name}...")
            response = requests.get(api_base_url, params=params)

            if response.status_code == 200:
                with open(output_file, "wb") as file:
                    file.write(response.content)
                print(f"Saved: {output_file}")

                # Add new bounding box entry
                new_bbox["image_name"] = new_file_name
                new_bbox_df = pd.DataFrame([new_bbox])  # Create a DataFrame for the new entry
                bbox_df = pd.concat([bbox_df, new_bbox_df], ignore_index=True)

                # Save the updated bounding box CSV after each new file
                bbox_df.to_csv(bbox_csv, index=False)
                print(f"Updated bounding box CSV saved after processing {new_file_name}.")
            else:
                print(f"Failed to download {new_file_name} - HTTP {response.status_code}")

    except Exception as e:
        print(f"Error processing {file_name}: {e}")


Loaded 33452 records from seamounts.csv.
Loaded 500 bounding box records from merged_pixel_coordinates.csv.
Downloading shifted tile: 2388515_0W_9N.png...
Saved: seamounts_moved/2388515_0W_9N.png
Updated bounding box CSV saved after processing 2388515_0W_9N.png.
Downloading shifted tile: 2388515_-10W_6N.png...
Saved: seamounts_moved/2388515_-10W_6N.png
Updated bounding box CSV saved after processing 2388515_-10W_6N.png.
Downloading shifted tile: 2388515_-13W_-7N.png...
Saved: seamounts_moved/2388515_-13W_-7N.png
Updated bounding box CSV saved after processing 2388515_-13W_-7N.png.
Downloading shifted tile: 2388515_-11W_5N.png...
Saved: seamounts_moved/2388515_-11W_5N.png
Updated bounding box CSV saved after processing 2388515_-11W_5N.png.
Downloading shifted tile: 2474327_1W_6N.png...
Saved: seamounts_moved/2474327_1W_6N.png
Updated bounding box CSV saved after processing 2474327_1W_6N.png.
Downloading shifted tile: 2474327_-10W_11N.png...
Saved: seamounts_moved/2474327_-10W_11N.png
Up

To load more seamounts, it would be necessary to find out the highest number in the image titles, then to modify the .csv to only contain the seamounts after that and then to run the above script again.

## Filter high-res data

In [3]:
import cv2
import os
import numpy as np

def crop_fixed_border(image, crop_pixels=70):
    """Crop a fixed number of pixels from each border."""
    height, width, _ = image.shape
    if height > crop_pixels * 2 and width > crop_pixels * 2:
        return image[crop_pixels:height-crop_pixels, crop_pixels:width-crop_pixels]
    else:
        print(f"Warning: Crop size too large for image {width}x{height}. Returning original image.")
        return image

def detect_blur(image, threshold=100.0):
    """Detect if an image is blurry using the variance of the Laplacian."""
    # Compute the Laplacian variance
    laplacian_var = cv2.Laplacian(image, cv2.CV_64F).var()
    is_blurry = laplacian_var < threshold
    return laplacian_var, is_blurry

def process_images(input_folder, high_res_folder, low_res_folder, crop_pixels=70, threshold=100.0):
    """Process images: crop, detect blur, and organize into folders."""
    os.makedirs(high_res_folder, exist_ok=True)
    os.makedirs(low_res_folder, exist_ok=True)

    log = []
    for filename in os.listdir(input_folder):
        file_path = os.path.join(input_folder, filename)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')):
            try:
                # Load the image
                image = cv2.imread(file_path)
                if image is None:
                    print(f"Image {file_path} is corrupt or unreadable.")
                    continue
                
                # Crop the image
                cropped_image = crop_fixed_border(image, crop_pixels)
                
                # Convert to grayscale for sharpness detection
                gray_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
                
                # Detect blur
                lap_var, is_blurry = detect_blur(gray_image, threshold)
                log.append((filename, lap_var, is_blurry))
                
                # Move files to the appropriate folder
                if is_blurry:
                    os.rename(file_path, os.path.join(low_res_folder, filename))
                else:
                    os.rename(file_path, os.path.join(high_res_folder, filename))
            
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

    # Save log to a CSV file
    with open(os.path.join(high_res_folder, 'log.csv'), 'w') as log_file:
        log_file.write('Filename,Laplacian Variance,Blurry\n')
        for entry in log:
            log_file.write(f"{entry[0]},{entry[1]:.2f},{entry[2]}\n")

# Example usage
input_folder = './seamounts_galore'
high_res_folder = './seamounts_galore/high_res'
low_res_folder = './seamounts_galore/low_res'
process_images(input_folder, high_res_folder, low_res_folder, crop_pixels=70, threshold=150.0)
