# Building OpenSim Parser from Scratch 🏗️

This notebook demonstrates building a comprehensive OpenSim parser with modular architecture from the ground up. We'll create separate parsers for .osim model files and .mot motion files, then combine them into a unified interface.

## 🎯 Objectives
- Build **OpenSimModelParser** for .osim model files
- Build **OpenSimMotionParser** for .mot motion files  
- Create **OpenSimParser** unified interface
- Implement graph neural network data format conversion
- Add comprehensive data validation and export capabilities

## 🏗️ Architecture Design
```
OpenSim Parser Suite
├── Core Data Structures (Body, Joint, Coordinate, Muscle)
├── OpenSimModelParser (.osim files)
├── OpenSimMotionParser (.mot files)
├── OpenSimParser (unified interface)
├── Data Validation & Compatibility
├── Graph Format Conversion
└── Export Functionality
```

**Author:** Eric Fonseca  
**Date:** August 4, 2025  
**Framework:** Built with Scott Delp's OpenSim architectural insights

## 1. Setup Development Environment 🚀

First, let's import all required libraries and set up the development environment with proper error handling and logging.

In [None]:
# Core Python libraries
import xml.etree.ElementTree as ET
from pathlib import Path
import pandas as pd
import numpy as np
import logging
import warnings
from typing import Dict, List, Optional, Union, Any, Tuple
from dataclasses import dataclass, field
from datetime import datetime
import json
import sys

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Optional scientific libraries
try:
    import networkx as nx
    HAS_NETWORKX = True
except ImportError:
    HAS_NETWORKX = False
    print("⚠️  NetworkX not available - graph visualizations will be limited")

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('OpenSimParser')

# Configure warnings and plotting
warnings.filterwarnings('ignore', category=FutureWarning)
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

# Project setup
project_root = Path('..').resolve()
sys.path.insert(0, str(project_root))

print("✅ Development environment setup complete!")
print(f"📁 Project root: {project_root}")
print(f"🐍 Python version: {sys.version.split()[0]}")
print(f"📊 NumPy version: {np.__version__}")
print(f"🐼 Pandas version: {pd.__version__}")
print(f"🎨 Matplotlib available: ✅")
print(f"🌐 NetworkX available: {'✅' if HAS_NETWORKX else '❌'}")

# Set up test data directory
test_dir = project_root / "experiments" / "motion_prediction_exp_20250803_200025"
print(f"\n🎯 Test data directory: {test_dir.name}")
print(f"📁 Directory exists: {'✅' if test_dir.exists() else '❌'}")

if test_dir.exists():
    osim_files = list(test_dir.glob("*.osim"))
    mot_files = list(test_dir.rglob("*.mot"))
    print(f"📄 Found {len(osim_files)} .osim files")
    print(f"📈 Found {len(mot_files)} .mot files")
else:
    print("⚠️  Test directory not found - will use dummy data for demonstration")

## 2. Define Core Data Structures 📊

Let's create the foundational data structures for OpenSim components. These dataclasses will represent bodies, joints, coordinates, muscles, and motion data.

In [None]:
@dataclass
class Body:
    """Represents a body segment in the OpenSim model."""
    name: str
    mass: float = 0.0
    center_of_mass: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
    inertia: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
    attached_geometry: List[str] = field(default_factory=list)
    
    def __post_init__(self):
        """Validate body parameters after initialization."""
        if self.mass < 0:
            logger.warning(f"Body {self.name} has negative mass: {self.mass}")

@dataclass
class Coordinate:
    """Represents a coordinate (degree of freedom) in the OpenSim model."""
    name: str
    motion_type: str  # 'rotational' or 'translational'
    unit: str = "radians"  # "radians", "degrees", "meters"
    default_value: float = 0.0
    range_min: float = -np.inf
    range_max: float = np.inf
    locked: bool = False
    prescribed_function: Optional[str] = None
    
    def __post_init__(self):
        """Validate coordinate parameters."""
        if self.motion_type not in ['rotational', 'translational']:
            logger.warning(f"Coordinate {self.name} has unknown motion type: {self.motion_type}")
        if self.range_min > self.range_max:
            logger.warning(f"Coordinate {self.name} has invalid range: [{self.range_min}, {self.range_max}]")

@dataclass
class Joint:
    """Represents a joint connecting two bodies."""
    name: str
    joint_type: str
    parent_body: str
    child_body: str
    coordinates: List[str] = field(default_factory=list)
    location_in_parent: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
    orientation_in_parent: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
    location_in_child: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
    orientation_in_child: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
    
    @property
    def dof(self) -> int:
        """Return degrees of freedom for this joint."""
        return len(self.coordinates)
    
    def __post_init__(self):
        """Validate joint parameters."""
        if self.parent_body == self.child_body:
            logger.warning(f"Joint {self.name} connects body to itself: {self.parent_body}")

@dataclass
class Muscle:
    """Represents a muscle in the OpenSim model."""
    name: str
    muscle_type: str
    max_isometric_force: float = 0.0
    optimal_fiber_length: float = 0.0
    tendon_slack_length: float = 0.0
    pennation_angle: float = 0.0
    activation_time_constant: float = 0.01
    deactivation_time_constant: float = 0.04
    path_points: List[Dict[str, Any]] = field(default_factory=list)
    
    def __post_init__(self):
        """Validate muscle parameters."""
        if self.max_isometric_force < 0:
            logger.warning(f"Muscle {self.name} has negative max force: {self.max_isometric_force}")
        if self.optimal_fiber_length <= 0:
            logger.warning(f"Muscle {self.name} has invalid fiber length: {self.optimal_fiber_length}")

@dataclass
class MotionData:
    """Represents motion data from a .mot file."""
    version: str = "1.0"
    n_rows: int = 0
    n_columns: int = 0
    in_degrees: bool = True
    data: Optional[pd.DataFrame] = None
    coordinate_names: List[str] = field(default_factory=list)
    time_column: str = "time"
    
    def get_duration(self) -> float:
        """Calculate motion duration in seconds."""
        if self.data is None or len(self.data) == 0:
            return 0.0
        if self.time_column in self.data.columns:
            time_col = self.data[self.time_column]
            return float(time_col.iloc[-1] - time_col.iloc[0])
        return 0.0
    
    def get_sampling_rate(self) -> float:
        """Calculate average sampling rate in Hz."""
        duration = self.get_duration()
        if duration > 0 and len(self.data) > 1:
            return (len(self.data) - 1) / duration
        return 0.0

print("✅ Core data structures defined successfully!")
print(f"📊 Body: Physical body segments with mass and inertia properties")
print(f"📐 Coordinate: Degrees of freedom with motion constraints")
print(f"🔗 Joint: Connections between bodies with spatial transforms")
print(f"💪 Muscle: Force-generating elements with physiological properties")
print(f"📈 MotionData: Time series data container with metadata")

# Test data structure creation
test_body = Body(name="pelvis", mass=11.777, center_of_mass=[0.0, 0.0, 0.0])
test_coord = Coordinate(name="hip_flexion_r", motion_type="rotational", unit="radians")
test_joint = Joint(name="hip_r", joint_type="ball", parent_body="pelvis", child_body="femur_r")

print(f"\n🧪 Test instances created:")
print(f"   Body: {test_body.name} (mass: {test_body.mass} kg)")
print(f"   Coordinate: {test_coord.name} ({test_coord.motion_type}, {test_coord.unit})")
print(f"   Joint: {test_joint.name} ({test_joint.parent_body} → {test_joint.child_body})")

## 3. Implement XML Parsing Functions 🔧

Now let's build utility functions to parse OpenSim XML format, handle namespaces, and convert XML attributes to Python objects.