In [1]:
import re
from typing import Optional, Dict, List, Union

def extract_points(text: str) -> Optional[Dict[str, Union[List[float], str]]]:
    """
    Extract coordinates and messages from point/points XML-like tags.
    Handles both single coordinates (x="10.5") and multiple coordinates (x1="10.5" x2="9").

    Args:
        text: Input text containing point/points tags

    Returns:
        Dictionary containing coordinates and messages, or None if no match
    """
    # Match either <point> or <points> tags
    pattern = r'<point(?:s)?([^>]*)>(.*?)</point(?:s)?>'
    match = re.search(pattern, text, re.IGNORECASE)

    if not match:
        return None

    attributes = match.group(1)
    main_message = match.group(2).strip()

    # Initialize lists for coordinates
    x_coords = []
    y_coords = []
    alt_message = None

    try:
        # Extract x coordinates (both x="val" and x1="val", x2="val" formats)
        x_matches = re.finditer(r'x(\d*)="([^"]*)"', attributes)
        for x_match in x_matches:
            x_coords.append(float(x_match.group(2)))

        # Extract y coordinates (both y="val" and y1="val", y2="val" formats)
        y_matches = re.finditer(r'y(\d*)="([^"]*)"', attributes)
        for y_match in y_matches:
            y_coords.append(float(y_match.group(2)))

        # Sort coordinates based on their index
        x_coords.sort()
        y_coords.sort()

        # Extract alt message
        alt_match = re.search(r'alt="([^"]*)"', attributes)
        if alt_match:
            alt_message = alt_match.group(1)

    except ValueError as e:
        print(f"Error parsing coordinates: {e}")
        return None

    if not x_coords or not y_coords or len(x_coords) != len(y_coords):
        print("Error: Missing coordinates or mismatched number of x and y coordinates")
        return None

    return {
        "x_coordinates": x_coords,
        "y_coordinates": y_coords,
        "alt_message": alt_message,
        "main_message": main_message
    }


In [3]:

# Test the function
test_cases = [
    '<points x="10.523" y="20.3" alt="test">Hello</points>',
    '<point x="12.7" y="50.2" alt="trash can">trash can</point>',
    '<points x1="10.5" x2="9" y1="2" y2="5" alt="multiple points">Two points</points>',
    '<points x1="1" x2="2" x3="3" y1="4" y2="5" y3="6" alt="three points">Three points</points>',
    '<point x="invalid" y="50.2">bad data</point>',  # Error case
    '<points x1="1" x2="2" y1="3">missing y2</points>',  # Mismatched coordinates
    'invalid input'  # No match case
]

for test in test_cases:
    print(f"\nInput: {test}")
    result = extract_points(test)
    print(f"Output: {result}")


Input: <points x="10.523" y="20.3" alt="test">Hello</points>
Output: {'x_coordinates': [10.523], 'y_coordinates': [20.3], 'alt_message': 'test', 'main_message': 'Hello'}

Input: <point x="12.7" y="50.2" alt="trash can">trash can</point>
Output: {'x_coordinates': [12.7], 'y_coordinates': [50.2], 'alt_message': 'trash can', 'main_message': 'trash can'}

Input: <points x1="10.5" x2="9" y1="2" y2="5" alt="multiple points">Two points</points>
Output: {'x_coordinates': [9.0, 10.5], 'y_coordinates': [2.0, 5.0], 'alt_message': 'multiple points', 'main_message': 'Two points'}

Input: <points x1="1" x2="2" x3="3" y1="4" y2="5" y3="6" alt="three points">Three points</points>
Output: {'x_coordinates': [1.0, 2.0, 3.0], 'y_coordinates': [4.0, 5.0, 6.0], 'alt_message': 'three points', 'main_message': 'Three points'}

Input: <point x="invalid" y="50.2">bad data</point>
Error parsing coordinates: could not convert string to float: 'invalid'
Output: None

Input: <points x1="1" x2="2" y1="3">missing y2<

In [6]:
import re
from typing import Optional, Dict, List, Union

def extract_points(text: str) -> Optional[Dict[str, Union[List[float], str]]]:
    """
    Extract coordinates and messages from point/points XML-like tags.
    Handles both single coordinates (x="10.5") and multiple coordinates (x1="10.5" x2="9").
    Returns all valid coordinate pairs even if some coordinates are missing.

    Args:
        text: Input text containing point/points tags

    Returns:
        Dictionary containing coordinates and messages, or None if no match
    """
    # Match either <point> or <points> tags
    pattern = r'<point(?:s)?([^>]*)>(.*?)</point(?:s)?>'
    match = re.search(pattern, text, re.IGNORECASE)

    if not match:
        return None

    attributes = match.group(1)
    main_message = match.group(2).strip()

    # Initialize dictionaries for coordinates
    x_dict = {}
    y_dict = {}
    alt_message = None

    try:
        # Extract x coordinates (both x="val" and x1="val", x2="val" formats)
        x_matches = re.finditer(r'x(\d*)="([^"]*)"', attributes)
        for x_match in x_matches:
            index = x_match.group(1) if x_match.group(1) else '1'
            x_dict[int(index)] = float(x_match.group(2))

        # Extract y coordinates (both y="val" and y1="val", y2="val" formats)
        y_matches = re.finditer(r'y(\d*)="([^"]*)"', attributes)
        for y_match in y_matches:
            index = y_match.group(1) if y_match.group(1) else '1'
            y_dict[int(index)] = float(y_match.group(2))

        # Extract alt message
        alt_match = re.search(r'alt="([^"]*)"', attributes)
        if alt_match:
            alt_message = alt_match.group(1)

    except ValueError as e:
        print(f"Error parsing coordinates: {e}")
        return None

    # Find valid coordinate pairs
    x_coords = []
    y_coords = []

    # Get all indices that have both x and y coordinates
    valid_indices = sorted(set(x_dict.keys()) & set(y_dict.keys()))

    for idx in valid_indices:
        x_coords.append(x_dict[idx])
        y_coords.append(y_dict[idx])

    if not x_coords or not y_coords:
        print("Error: No valid coordinate pairs found")
        return None

    return {
        "x_coordinates": x_coords,
        "y_coordinates": y_coords,
        "alt_message": alt_message,
        "main_message": main_message,
    }


In [7]:

# Test the function
test_cases = [
    '<points x="10.5" y="20.3" alt="test">Hello</points>',
    '<point x="12.7" y="50.2" alt="trash can">trash can</point>',
    '<points x1="10.5" x2="9" y1="2" y2="5" alt="multiple points">Two points</points>',
    '<points x1="1" x2="2" x3="3" y1="4" y2="5" y3="6" alt="three points">Three points</points>',
    '<points x1="1" x2="2" y1="3">missing y2 but x1,y1 valid</points>',
    '<points x1="1" x2="2" x3="3" y2="5" y3="6">missing y1 and x4</points>',
    '<point x="invalid" y="50.2">bad data</point>',  # Error case
    'invalid input'  # No match case
]

for test in test_cases:
    print(f"\nInput: {test}")
    result = extract_points(test)
    print(f"Output: {result}")


Input: <points x="10.5" y="20.3" alt="test">Hello</points>
Output: {'x_coordinates': [10.5], 'y_coordinates': [20.3], 'alt_message': 'test', 'main_message': 'Hello'}

Input: <point x="12.7" y="50.2" alt="trash can">trash can</point>
Output: {'x_coordinates': [12.7], 'y_coordinates': [50.2], 'alt_message': 'trash can', 'main_message': 'trash can'}

Input: <points x1="10.5" x2="9" y1="2" y2="5" alt="multiple points">Two points</points>
Output: {'x_coordinates': [10.5, 9.0], 'y_coordinates': [2.0, 5.0], 'alt_message': 'multiple points', 'main_message': 'Two points'}

Input: <points x1="1" x2="2" x3="3" y1="4" y2="5" y3="6" alt="three points">Three points</points>
Output: {'x_coordinates': [1.0, 2.0, 3.0], 'y_coordinates': [4.0, 5.0, 6.0], 'alt_message': 'three points', 'main_message': 'Three points'}

Input: <points x1="1" x2="2" y1="3">missing y2 but x1,y1 valid</points>
Output: {'x_coordinates': [1.0], 'y_coordinates': [3.0], 'alt_message': None, 'main_message': 'missing y2 but x1,y1 v