In [1]:
import os
import requests
import pandas as pd
import math
import cv2
import numpy as np
import time
import logging

# Parameters
csv_file = "seamounts.csv"  # Path to the .csv file
output_folder = "seamounts_galore"  # Folder to save downloaded images
annotated_folder = "seamounts_annotated"  # Folder to save annotated images
bbox_folder = "seamounts_bboxes"  # New folder to save bounding box images
bbox_file = "seamounts_bboxes.csv"  # File to save bounding box data
tile_pixels = 800  # Width and height of the image in pixels
api_base_url = "https://www.gmrt.org/services/ImageServer"
max_retries = 3  # Max retries for failed downloads

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Ensure output folders exist
os.makedirs(output_folder, exist_ok=True)
os.makedirs(annotated_folder, exist_ok=True)
os.makedirs(bbox_folder, exist_ok=True)

# Load CSV file and filter rows with LAT between -60 and 60
try:
    df = pd.read_csv(csv_file)
    df = df[(df["LAT"] >= -60) & (df["LAT"] <= 60)]  # Filter latitudes between -60 and 60
    df = df.sample(n=500, random_state=22).reset_index(drop=True)  # Randomly sample 3000 rows
    logging.info(f"Loaded {len(df)} records from {csv_file}.")
except Exception as e:
    logging.error(f"Error loading .csv file: {e}")
    exit()

# Prepare bounding box file
bbox_data = []

# Process each record
for index, row in df.iterrows():
    retries = 0
    while retries < max_retries:
        try:
            # Extract data from the row
            file_name = str(row["PEAKID"])  # Use PEAKID as the file name
            center_lon = float(row["LONG"])  # Longitude from LONG
            center_lat = float(row["LAT"])  # Latitude from LAT
            area_km2 = float(row["AREA2D"])  # Area in km^2

            # Calculate the radius in degrees from the area
            radius_deg = math.sqrt(area_km2 / math.pi) / 111.0  # 1 degree ≈ 111 km

            # Calculate bounding box coordinates
            x_min = center_lon - radius_deg
            x_max = center_lon + radius_deg
            y_min = center_lat - radius_deg
            y_max = center_lat + radius_deg

            # Debugging: Print calculated values
            logging.debug(f"PEAKID: {file_name}")
            logging.debug(f"Center Lat: {center_lat}, Center Lon: {center_lon}")
            logging.debug(f"Radius (degrees): {radius_deg}")
            logging.debug(f"Bounding Box: {x_min}, {y_min}, {x_max}, {y_max}")

            # Prepare API parameters for image download (using bounding box coordinates)
            params = {
                "minlatitude": y_min,
                "maxlatitude": y_max,
                "minlongitude": x_min,
                "maxlongitude": x_max,
                "width": tile_pixels,
                "mask": "false",
                "download": "true",
            }

            # Construct file paths
            bbox_file_path = os.path.join(bbox_folder, f"{file_name}_1.png")  # Save bbox-specific image

            # Download the image for the bounding box
            logging.info(f"Downloading bounding box image for PEAKID {file_name}...")
            response = requests.get(api_base_url, params=params)

            if response.status_code == 200:
                # Save the bounding box image
                with open(bbox_file_path, "wb") as file:
                    file.write(response.content)
                logging.info(f"Bounding box image saved: {bbox_file_path}")

                # Append bounding box data
                bbox_data.append([file_name, x_min, y_min, x_max, y_max])
            else:
                logging.error(f"Failed to download bounding box for PEAKID {file_name} - HTTP {response.status_code}")
                retries += 1
                time.sleep(2)  # Wait before retrying
                continue

            break  # Break the retry loop if successful

        except Exception as e:
            logging.error(f"Error processing row {index}: {e}")
            retries += 1
            time.sleep(2)  # Wait before retrying
            continue

# Save bounding box data to CSV
bbox_df = pd.DataFrame(bbox_data, columns=["PEAKID", "x_min", "y_min", "x_max", "y_max"])
bbox_df.to_csv(bbox_file, index=False)
logging.info(f"Bounding box data saved to {bbox_file}")

logging.info("Process complete.")


2024-12-22 17:18:13,371 - INFO - Loaded 500 records from seamounts.csv.
2024-12-22 17:18:13,372 - INFO - Downloading bounding box image for PEAKID 991601.0...
2024-12-22 17:18:14,964 - INFO - Bounding box image saved: seamounts_bboxes\991601.0_1.png
2024-12-22 17:18:14,965 - INFO - Downloading bounding box image for PEAKID 4525212.0...
2024-12-22 17:18:16,495 - INFO - Bounding box image saved: seamounts_bboxes\4525212.0_1.png
2024-12-22 17:18:16,496 - INFO - Downloading bounding box image for PEAKID 4417924.0...
2024-12-22 17:18:17,691 - INFO - Bounding box image saved: seamounts_bboxes\4417924.0_1.png
2024-12-22 17:18:17,692 - INFO - Downloading bounding box image for PEAKID 3048330.0...
2024-12-22 17:18:18,986 - INFO - Bounding box image saved: seamounts_bboxes\3048330.0_1.png
2024-12-22 17:18:18,987 - INFO - Downloading bounding box image for PEAKID 3403726.0...
2024-12-22 17:18:20,272 - INFO - Bounding box image saved: seamounts_bboxes\3403726.0_1.png
2024-12-22 17:18:20,273 - INFO