Preprocessing

In [1]:
import os
import cv2

# Parameters
image_folder = "seamount_images_equal_tiles"  # Root directory of the dataset
output_folder = "seamount_images_equal_tiles_cropped"  # Temporary folder for cropped images
# image_size = (128, 128)  # Image dimensions for the model
crop_pixels = 65  # Pixels to crop from each border

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Function to crop borders from an image
def crop_fixed_border(image, crop_pixels=5):
    height, width, _ = image.shape
    if height > crop_pixels * 2 and width > crop_pixels * 2:
        cropped_image = image[crop_pixels:height-crop_pixels, crop_pixels:width-crop_pixels]
        return cropped_image
    else:
        return image  # Return original image if cropping not possible

# Preprocess images (cropping and resizing)
print("Preprocessing images...")

# Loop through all files in the image folder
for filename in os.listdir(image_folder):
    image_path = os.path.join(image_folder, filename)
    
    # Check if the file is an image (you can expand the check for other image formats)
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        # Read the image
        image = cv2.imread(image_path)
        
        # Crop the image
        cropped_image = crop_fixed_border(image, crop_pixels)
        
        # Save the cropped image to the output folder
        output_image_path = os.path.join(output_folder, filename)
        cv2.imwrite(output_image_path, cropped_image)

print("Preprocessing complete.")


Preprocessing images...
Preprocessing complete.


Matching

In [3]:
import cv2
import os
import pandas as pd

# Function to find smaller map tile in the corresponding larger map tile using the floating window approach
def find_and_mark_tiles(large_tile_folder, small_tile_folder, output_csv, start_scale=0.1, scale_step=0.1, max_scale=5, manual_correction_x=0, manual_correction_y=0):
    # List to store results
    results = []
    
    # Get list of files in both folders
    large_tiles = os.listdir(large_tile_folder)
    small_tiles = os.listdir(small_tile_folder)

    # Debug: print the list of files in both folders
    print("Large tiles:", large_tiles)
    print("Small tiles:", small_tiles)

    # Create a dictionary of small tiles indexed by PEAKID
    small_tiles_dict = {}
    for small_tile_name in small_tiles:
        # Extract the PEAKID from the smaller tile's filename (assuming format [PEAKID]_1.png)
        peak_id = small_tile_name.split('_')[0]
        small_tiles_dict[peak_id] = small_tile_name

    # Debug: print the small tiles dictionary
    print("Small tiles dictionary:", small_tiles_dict)

    # Iterate through each large tile
    for large_tile_name in large_tiles:
        # Extract the PEAKID from the larger tile's filename (assuming format [PEAKID].png)
        peak_id = large_tile_name.split('.')[0]

        # Ensure the PEAKID matches the format of the small tile (i.e., keep the '.0' if present)
        peak_id = str(float(peak_id))  # This will preserve the ".0" if present
        
        # Debug: print the current large tile being processed
        print(f"Processing large tile: {large_tile_name}, PEAKID: {peak_id}")
        
        # Check if there is a corresponding small tile for this PEAKID
        if peak_id in small_tiles_dict:
            small_tile_name = small_tiles_dict[peak_id]
            large_tile_path = os.path.join(large_tile_folder, large_tile_name)
            small_tile_path = os.path.join(small_tile_folder, small_tile_name)

            # Debug: print the paths of the current large and small tiles
            print(f"Found corresponding small tile: {small_tile_name}")
            print(f"Large tile path: {large_tile_path}")
            print(f"Small tile path: {small_tile_path}")

            # Check if the marked image already exists
            marked_image_path = os.path.join('marked_images', f"marked_{large_tile_name}")
            if os.path.exists(marked_image_path):
                print(f"Skipping already processed large tile: {large_tile_name}")
                continue  # Skip this large tile if it's already processed

            # Read the images
            large_tile = cv2.imread(large_tile_path, cv2.IMREAD_COLOR)
            small_tile = cv2.imread(small_tile_path, cv2.IMREAD_COLOR)

            if large_tile is None:
                print(f"Error: Unable to load large tile {large_tile_path}")
                continue
            if small_tile is None:
                print(f"Error: Unable to load small tile {small_tile_path}")
                continue

            # Get the center of the large tile
            large_tile_center = (large_tile.shape[1] // 2, large_tile.shape[0] // 2)

            # Perform floating window approach: resize and match the small tile centered on the large tile
            max_match_val = 0
            best_match_loc = None
            best_resized_tile = None

            # Start with the small tile and gradually increase its size
            for scale_factor in frange(start_scale, max_scale + scale_step, scale_step):  # Start from start_scale, scale by scale_step
                if scale_factor <= 0:  # Skip non-positive scale factors
                    continue
                
                # Resize the small tile
                new_width = int(small_tile.shape[1] * scale_factor)
                new_height = int(small_tile.shape[0] * scale_factor)

                # Ensure width and height are positive values
                if new_width <= 0 or new_height <= 0:
                    continue  # Skip if the new dimensions are not valid
                
                resized_small_tile = cv2.resize(small_tile, (new_width, new_height))

                # Calculate the top-left corner to center the resized tile on the large tile
                top_left_x = large_tile_center[0] - resized_small_tile.shape[1] // 2
                top_left_y = large_tile_center[1] - resized_small_tile.shape[0] // 2

                # Ensure the top-left corner is within the bounds of the large tile
                if top_left_x < 0 or top_left_y < 0 or top_left_x + resized_small_tile.shape[1] > large_tile.shape[1] or top_left_y + resized_small_tile.shape[0] > large_tile.shape[0]:
                    continue  # Skip if the resized tile is out of bounds

                # Extract the region of interest (ROI) from the large tile where the small tile will be overlaid
                roi = large_tile[top_left_y:top_left_y + resized_small_tile.shape[0], top_left_x:top_left_x + resized_small_tile.shape[1]]

                # Perform template matching
                result = cv2.matchTemplate(roi, resized_small_tile, cv2.TM_CCOEFF_NORMED)
                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

                # Debug: print the result of template matching for each scale
                print(f"Scale factor {scale_factor}: Max value: {max_val}, Location: {max_loc}")

                # If the match is better than the previous best match, update the best match
                if max_val > max_match_val:
                    max_match_val = max_val
                    best_match_loc = (top_left_x + max_loc[0], top_left_y + max_loc[1])  # Adjust the location
                    best_resized_tile = resized_small_tile

            # If a good match is found, draw a rectangle around the matched region
            if max_match_val > 0.8:  # You can adjust the threshold as needed
                top_left = best_match_loc
                h, w = best_resized_tile.shape[:2]
                bottom_right = (top_left[0] + w, top_left[1] + h)

                # Apply manual correction
                top_left = (top_left[0] - manual_correction_x, top_left[1] - manual_correction_y)
                bottom_right = (bottom_right[0] + manual_correction_x, bottom_right[1] + manual_correction_y)

                # Draw the corrected rectangle
                cv2.rectangle(large_tile, top_left, bottom_right, (0, 255, 0), 2)

                # Store the coordinates of the box
                results.append({
                    'large_tile': large_tile_name,
                    'small_tile': small_tile_name,
                    'top_left_x': top_left[0],
                    'top_left_y': top_left[1],
                    'bottom_right_x': bottom_right[0],
                    'bottom_right_y': bottom_right[1]
                })

                # Optionally, save the marked image to a file
                os.makedirs('marked_images', exist_ok=True)
                cv2.imwrite(marked_image_path, large_tile)
                print(f"Marked image saved to: {marked_image_path}")

            else:
                print(f"No good match found for {large_tile_name} and {small_tile_name}")

        else:
            print(f"No corresponding small tile found for PEAKID: {peak_id}")

    # Debug: print the results
    print(f"Found {len(results)} matches.")
    
    # Convert the results to a pandas DataFrame and save to CSV
    if results:
        df = pd.DataFrame(results)
        df.to_csv(output_csv, index=False)
        print(f"Results saved to {output_csv}")
    else:
        print("No matches to save.")

# Helper function to generate a range of floating point numbers
def frange(start, stop, step):
    while start < stop:
        yield round(start, 2)  # Round to avoid floating-point precision issues
        start += step

# Example usage
find_and_mark_tiles('seamounts_galore_cropped', 'seamounts_bboxes_cropped', 'output_pixel_coordinates.csv', start_scale=0.1, scale_step=0.1, max_scale=5, manual_correction_x=5, manual_correction_y=5)


Large tiles: ['1001431.0.png', '1013016.0.png', '1025041.0.png', '2002867.0.png', '2076096.0.png', '2120674.0.png', '2203770.0.png', '2325818.0.png', '2327372.0.png', '2358338.0.png', '2392192.0.png', '2392966.0.png', '2394421.0.png', '2395572.0.png', '2401816.0.png', '2414507.0.png', '2419181.0.png', '2438586.0.png', '2451447.0.png', '2474642.0.png', '2486137.0.png', '2554156.0.png', '2581063.0.png', '2593656.0.png', '2619027.0.png', '2624703.0.png', '2627248.0.png', '2657009.0.png', '2681196.0.png', '2691027.0.png', '2692198.0.png', '2698278.0.png', '2701105.0.png', '2708692.0.png', '2709245.0.png', '2711567.0.png', '2714397.0.png', '2725962.0.png', '2747848.0.png', '2754015.0.png', '2759106.0.png', '2772117.0.png', '2779873.0.png', '2798451.0.png', '2799659.0.png', '2799865.0.png', '2809584.0.png', '2819970.0.png', '2841042.0.png', '2853267.0.png', '2858689.0.png', '2859007.0.png', '2862843.0.png', '2887480.0.png', '2915152.0.png', '2938197.0.png', '2948196.0.png', '2948771.0.png', 