**Pre-process o3 file to JSON format**

- NOTES: each patient's os files could named differently

    (e.g.) "LIS_SJxxxxxx.o3" or "SJxxxxxx.o3"

In [None]:
import numpy as np
import os
import re
import pandas as pd
import json


In [None]:
input_path = '/home/travail/Antonin_Dataset/o3_symlink/SJ0000285/2009-02-25/LIS_SJ0000285.o3'

In [None]:
# Visualize what in the o3 file
with open(input_path, 'r', encoding='utf-8') as f:
    content = f.readlines()
for line in content[:29]: 
    print(line.strip())

In [None]:
def parse_o3_file(file_path):
    """
    function to process o3 file adn return structure data
    
    """
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    with open(file_path, 'r', encoding='utf-8') as f:
        data = f.read()
    
    # # Extract dossier, date, and source
    # dossier = data.split("Dossier:")[1].split("Date:")[0].strip()
    # date = data.split("Date:")[1].split("Source:")[0].strip()
    # source = data.split("Source:")[1].split("#")[0].strip()
    
    # Extract objects section
    objects_section = data.split("DATA 3D")[1].strip()
    objets = {}

    # Process each object
    object_blocks = objects_section.split("Objet:")
    for block in object_blocks[1:]:  
        lines = block.strip().splitlines()
        name = lines[0].strip() 
        
        points = []
        for line in lines[2:]:
            parts = line.split()
            if len(parts) == 4:
                tag, x, y, z = parts
                point = {
                    "tag": tag,
                    "x": float(x),
                    "y": float(y),
                    "z": float(z)
                }
            elif len(parts) == 5:
                tag, x, y, z, err = parts
                point = {
                    "tag": tag,
                    "x": float(x),
                    "y": float(y),
                    "z": float(z),
                    "err": float(err)
                }
            elif len(parts) == 6:
                tag, x, y, z, err, src = parts
                point = {
                    "tag": tag,
                    "x": float(x),
                    "y": float(y),
                    "z": float(z),
                    "err": float(err),
                    "src": src
                }
            else:
                print(f"Warning: Unexpected format in line: {line}")
                continue  # Skip malformed lines

            points.append(point)
        
        objets[name] = {"Points": points}

    # Combine into final structured data
    structured_data = {
        # "Dossier": dossier,
        # "Date": date,
        # "Source": source,
        "Objets": objets
    }
    
    return structured_data

In [None]:
structured_data = parse_o3_file(input_path)

print("Structure Data:")
print(structured_data)

In [None]:
# save and load json file
def save_json(data, output_path):
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)
    print(f"JSON file saved at: {output_path}")
    
def load_json(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

In [None]:
output_path = input_path.replace(".o3", ".json")
save_json(structured_data, output_path)

In [None]:
# load json file and retrieve certain labeled points
data = load_json(output_path)
vertebre_T1 = data['Objets'].get('Vertebre_T1', {})
# get the certain tag points
ped_inf_d = next((p for p in vertebre_T1.get('Points', []) if p['tag'] == 'Ped_Inf_D'), None)
x, y, z = ped_inf_d['x'], ped_inf_d['y'], ped_inf_d['z']
print(f"Ped_Inf_D coordinates: x={x}, y={y}, z={z}")