This script will convert a column of coordinates in d-m-s to d-d.

In [5]:
import pandas as pd
import numpy as np
import re

In [6]:
csv_file_path = '15d-01.csv'
data = pd.read_csv(csv_file_path, low_memory=False)

In [7]:
def convert_to_decimal(coordinate_str):
    try:
        print(f"Processing: {coordinate_str}")  # Debug print
        
        # Decode HTML entities
        coordinate_str = coordinate_str.replace("&#176;", "°").replace("&#697;", "'").replace("&#698;", '"').replace("ʹ", "'").replace("ʺ", '"')
        
        print(f"After HTML replacement: {coordinate_str}")  # Debug print
        
        # Check if already in decimal format
        if "." in coordinate_str:
            decimal_degrees = float(re.search(r"-?\d+\.\d+", coordinate_str).group())
        
        else:
            # Extract numerical values
            num_values = [float(x) for x in re.findall(r"\d+", coordinate_str)]
            
            print(f"Extracted numbers: {num_values}")  # Debug print
            
            # DMS to decimal with or without special characters
            if len(num_values) == 3:
                degrees, minutes, seconds = num_values
                decimal_degrees = degrees + (minutes / 60.0) + (seconds / 3600.0)
            
            # DM to decimal
            elif len(num_values) == 2:
                degrees, minutes = num_values
                decimal_degrees = degrees + (minutes / 60.0)
                
            # Only degrees
            elif len(num_values) == 1:
                degrees = num_values[0]
                decimal_degrees = degrees

            else:
                return None  # Unrecognized format

        if "S" in coordinate_str or "W" in coordinate_str:
            decimal_degrees *= -1

        return round(decimal_degrees, 4)

    except Exception as e:
        print(f"Error while converting coordinate: {e}")
        return None

def extract_bounding_box(coordinate_field):
    try:
        # Split latitude and longitude pairs
        lon_str, lat_str = coordinate_field.split("/")
        
        # Extract individual coordinates and convert to decimal
        west, east = map(convert_to_decimal, lon_str.split("--"))
        north, south = map(convert_to_decimal, lat_str.split("--"))
        
        # Check if any conversion failed
        if None in [west, east, north, south]:
            print("Conversion failed for one or more coordinates.")
            return None
        
        return f"{west},{south},{east},{north}"
    except Exception as e:
        print(f"Error while extracting bounding box: {e}")
        return None

data['Extracted Bounding Box'] = data['Coordinates'].apply(extract_bounding_box)

Processing: E 11°
After HTML replacement: E 11°
Extracted numbers: [11.0]
Processing: E 20° 
After HTML replacement: E 20° 
Extracted numbers: [20.0]
Processing:  S 4°
After HTML replacement:  S 4°
Extracted numbers: [4.0]
Processing: S 19°
After HTML replacement: S 19°
Extracted numbers: [19.0]
Processing: E 13°06'
After HTML replacement: E 13°06'
Extracted numbers: [13.0, 6.0]
Processing: E 39°26'
After HTML replacement: E 39°26'
Extracted numbers: [39.0, 26.0]
Processing: N 48°15'
After HTML replacement: N 48°15'
Extracted numbers: [48.0, 15.0]
Processing: N 33°44'
After HTML replacement: N 33°44'
Extracted numbers: [33.0, 44.0]
Processing: E 14°15'
After HTML replacement: E 14°15'
Extracted numbers: [14.0, 15.0]
Processing: E 27°55'
After HTML replacement: E 27°55'
Extracted numbers: [27.0, 55.0]
Processing: N 49°50'
After HTML replacement: N 49°50'
Extracted numbers: [49.0, 50.0]
Processing: N 41°20'
After HTML replacement: N 41°20'
Extracted numbers: [41.0, 20.0]
Processing: E 16

In [8]:
cleaned_file_path = "bbox_" + csv_file_path


# Write the cleaned data to a CSV file
data.to_csv(cleaned_file_path, index=False)