# YAML Format Converter for Optinist

This notebook converts Optinist workflow YAML files from the old format to the new format. The main changes include:
- Restructuring parameters into nested groups
- Preserving parameter values

In [None]:
import yaml
import copy
import os
from typing import Dict
from pathlib import Path

class WorkflowConverter:
    def __init__(self):
        # Load template files. These are the default parameters for each node type in the new format.
        self.templates = {}
        template_files = [
            'studio/app/optinist/wrappers/caiman/params/caiman_cnmf.yaml',
            'studio/app/optinist/wrappers/caiman/params/caiman_cnmfe.yaml',
            'studio/app/optinist/wrappers/caiman/params/caiman_mc.yaml',
            'studio/app/optinist/wrappers/suite2p/params/suite2p_file_convert.yaml',
            'studio/app/optinist/wrappers/suite2p/params/suite2p_registration.yaml',
            'studio/app/optinist/wrappers/suite2p/params/suite2p_roi.yaml',
            'studio/app/optinist/wrappers/lccd/params/lccd_cell_detection.yaml',
            'studio/app/optinist/wrappers/optinist/basic_neural_analysis/params/eta.yaml',
            'studio/app/optinist/wrappers/optinist/dimension_reduction/params/cca.yaml',
            'studio/app/optinist/wrappers/optinist/dimension_reduction/params/dpca.yaml',
            'studio/app/optinist/wrappers/optinist/dimension_reduction/params/pca.yaml',
            'studio/app/optinist/wrappers/optinist/dimension_reduction/params/tsne.yaml',
            'studio/app/optinist/wrappers/optinist/neural_decoding/params/glm.yaml',
            'studio/app/optinist/wrappers/optinist/neural_decoding/params/lda.yaml',
            'studio/app/optinist/wrappers/optinist/neural_decoding/params/svm.yaml',
            'studio/app/optinist/wrappers/optinist/neural_population_analysis/params/cross_correlation.yaml',
            'studio/app/optinist/wrappers/optinist/neural_population_analysis/params/granger.yaml',
            'studio/app/optinist/wrappers/optinist/neural_population_analysis/params/correlation.yaml'
        ]
        
        current_working_dir = os.getcwd()
        project_root_dir = os.path.dirname(current_working_dir)
        
        for template_file in template_files:
            try:
                full_path = os.path.join(project_root_dir, template_file)
                with open(full_path, 'r') as f:
                    name = os.path.basename(template_file).replace('.yaml', '')
                    self.templates[name] = yaml.safe_load(f)
            except Exception as e:
                print(f"Warning: Could not load template {template_file}: {str(e)}")

    def extract_value(self, param):
        """Extract the actual value from a parameter that might be nested."""
        if isinstance(param, dict):
            if 'type' in param and 'value' in param:
                return param['value']
            return {k: self.extract_value(v) for k, v in param.items()}
        return param

    def convert_to_param_structure(self, value, path):
        """Convert a raw value into the required parameter structure."""
        if isinstance(value, dict):
            return {
                "type": "parent",
                "children": {
                    k: self.convert_to_param_structure(v, f"{path}/{k}")
                    for k, v in value.items()
                }
            }
        return {
            "type": "child",
            "value": value,
            "path": path
        }

    def copy_values_with_structure(self, template_dict, old_dict, parent_path=""):
        """Recursively copy values while maintaining template structure."""
        result = {}
        for key, template_value in template_dict.items():
            path = f"{key}" if not parent_path else f"{parent_path}/{key}"
            
            if isinstance(template_value, dict):
                # Handle nested dictionary
                result[key] = {
                    "type": "parent",
                    "children": self.copy_values_with_structure(
                        template_value,
                        old_dict.get(key, {}),
                        path
                    )
                }
            else:
                # Handle leaf values
                value = self.extract_value(old_dict.get(key, template_value))
                result[key] = {
                    "type": "child",
                    "value": value,
                    "path": path
                }
        return result

    def convert_node(self, node: Dict) -> Dict:
        """Convert a single node using template as reference."""
        if not node.get('data', {}).get('param'):
            return node

        node_type = node['data']['label'].lower()
        if node_type not in self.templates:
            return node
            
        # Get raw values from the old parameters
        old_params = self.extract_value(node['data'].get('param', {}))
        template = self.templates[node_type]
        
        # Create new parameters based on template structure
        new_params = self.copy_values_with_structure(template, old_params)
        
        # Create new node with updated parameters
        node_copy = copy.deepcopy(node)
        node_copy['data']['param'] = new_params
        return node_copy

    def convert_workflow(self, workflow: Dict) -> Dict:
        """Convert entire workflow."""
        result = copy.deepcopy(workflow)
        
        for node_id, node in result['nodeDict'].items():
            if node.get('type') == 'AlgorithmNode':
                converted_node = self.convert_node(node)
                # Ensure essential metadata is preserved
                if 'data' not in converted_node:
                    converted_node['data'] = {}
                if 'fileType' not in converted_node['data']:
                    converted_node['data']['fileType'] = None
                if 'hdf5Path' not in converted_node['data']:
                    converted_node['data']['hdf5Path'] = None
                if 'matPath' not in converted_node['data']:
                    converted_node['data']['matPath'] = None
                result['nodeDict'][node_id] = converted_node
        
        return result


def convert_workflow_file(input_file: str, output_file: str):
    """Convert workflow file from old to new format."""
    converter = WorkflowConverter()
    
    with open(input_file, 'r') as f:
        workflow = yaml.safe_load(f)
        
    converted = converter.convert_workflow(workflow)
    
    # Validate essential structures
    for node_id, node in converted['nodeDict'].items():
        if node.get('type') == 'AlgorithmNode':
            # Ensure all required fields are present
            assert 'data' in node, f"Node {node_id} missing data field"
            assert 'param' in node['data'], f"Node {node_id} missing param field"
            assert 'path' in node['data'], f"Node {node_id} missing path field"
            assert 'type' in node['data'], f"Node {node_id} missing type field"
            
            # Validate parameter structure
            for param_key, param_value in node['data']['param'].items():
                if isinstance(param_value, dict):
                    assert 'type' in param_value, f"Parameter {param_key} in node {node_id} missing type field"
                    if param_value['type'] == 'child':
                        assert 'value' in param_value, f"Child parameter {param_key} in node {node_id} missing value field"
                        assert 'path' in param_value, f"Child parameter {param_key} in node {node_id} missing path field"
                    elif param_value['type'] == 'parent':
                        assert 'children' in param_value, f"Parent parameter {param_key} in node {node_id} missing children field"
    
    ordered_workflow = {
        'nodeDict': converted['nodeDict'],
        'edgeDict': converted['edgeDict']
    }
    
    with open(output_file, 'w') as f:
        yaml.dump(ordered_workflow, f, sort_keys=False)

# Usage example

In [None]:
# input_file = "/my/path/workflow_oldstyle.yaml" 
# output_file = "/my/path/workflow_newstyle.yaml"
input_file = ".yaml"
output_file = ".yaml" # any name you want
convert_workflow_file(input_file, output_file)