# Convert Center Points and Calibration Coordinates to .csv

Script exports one .csv for shark center points and one .csv for calibration end points, both of which are stored in the same location as the .xml file denoting the original annotations (exported from CVAT as CVAT for Images 1.1, not COCO)

The centerpoints.csv file contains columns necessary for parsing in the DataLoader() (center_x, center_y).

In [10]:
import xml.etree.ElementTree as ET
import csv
import os

In [20]:
# Path to your XML file; export points annotations as 'CVAT for Images 1.1'
xml_file = '/mnt/class_data/group2/alexandradigiacomo/dataset/metadata/centerpoints/centerpoints.xml'

In [21]:
# Parse the XML file
tree = ET.parse(xml_file)
root = tree.getroot()
xml_dir = os.path.dirname(xml_file)

# Create lists to store the data for both labels
centerpoint_data = []
calibrationpoints_data = []

# Iterate over all images and points
for image in root.findall('image'):
    image_name = image.get('name')  # Extract the image name
    points_elements = image.findall('points')  # Find all point annotations

    for points in points_elements:
        label = points.get('label')  # Extract the label of the annotation
        
        # Check the label and append to the corresponding list
        if label == "shark center point":
            points_str = points.get('points')  # Extract the coordinates as a string
            coordinates = points_str.split(';')  # Split the coordinates by semicolon
            
            # Clean up the coordinates and convert to x, y
            x_coords = []
            y_coords = []
            for coord in coordinates:
                x, y = coord.split(',')
                x_coords.append(float(x.strip()))  # Convert to float
                y_coords.append(float(y.strip()))  # Convert to float
            
            # Calculate center_y and center_x (mean of y and x coordinates)
            center_y = sum(y_coords) / len(y_coords)
            center_x = sum(x_coords) / len(x_coords)

            # Append to centerpoint_data with center_y and center_x
            centerpoint_data.append([image_name, label, center_y, center_x] + [f'{x},{y}' for x, y in zip(x_coords, y_coords)])
        
        elif label == "calibration end points":
            points_str = points.get('points')  # Extract the coordinates as a string
            coordinates = points_str.split(';')  # Split the coordinates by semicolon
            
            # Clean up the coordinates and convert to x, y
            coords = []
            for coord in coordinates:
                x, y = coord.split(',')
                coords.append(f'{x.strip()},{y.strip()}')
            
            # Append to calibrationpoints_data
            calibrationpoints_data.append([image_name, label] + coords)

# Path for the CSV files (same location as the XML file)
centerpoint_csv_file = os.path.join(xml_dir, 'centerpoints.csv')
calibrationpoints_csv_file = os.path.join(xml_dir, 'calibrationpoints.csv')

# Write the centerpoint data to a CSV file
with open(centerpoint_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write header (including center_y and center_x)
    writer.writerow(['filename', 'label', 'center_y', 'center_x', 'coordinates'])
    # Write data
    for row in centerpoint_data:
        writer.writerow(row)

# Write the calibrationpoints data to a CSV file
with open(calibrationpoints_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write header
    writer.writerow(['filename', 'label', 'coordinates'])
    # Write data
    for row in calibrationpoints_data:
        writer.writerow(row)

print(f"CSV files written to: {centerpoint_csv_file} and {calibrationpoints_csv_file}")


CSV files written to: /mnt/class_data/group2/alexandradigiacomo/dataset/metadata/centerpoints/centerpoints.csv and /mnt/class_data/group2/alexandradigiacomo/dataset/metadata/centerpoints/calibrationpoints.csv
