# Parameters

In [None]:
MODEL_BASE_DIR = "/home/mateo/cancer-ai/manager/models"

# Utils

In [None]:
def get_model_list(folder_path):
    import os

    # List to store ONNX model paths
    onnx_models = []

    # Loop through the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".onnx"):
            onnx_models.append(os.path.join(folder_path, file_name))

    return onnx_models

In [4]:
import torch
import torch.nn as nn
from torchvision.models import efficientnet_b7, EfficientNet_B7_Weights
import onnxruntime as ort
import torch.nn.functional as F


class OnnxCombineModel(nn.Module):
    def __init__(self, model_path1, model_path2):
        super().__init__()

        self.session1 = ort.InferenceSession(model_path1)
        self.session2 = ort.InferenceSession(model_path2)
        self.input_names1 = [inp.name for inp in self.session1.get_inputs()]
        self.input_names2 = [inp.name for inp in self.session2.get_inputs()]

    def forward(self, image, demographics):
        print("|" * 60)
        print(self.input_names1)
        print(self.input_names2)
        inputs1 = {self.input_names1[0]: image, self.input_names1[1]: demographics}
        # inputs2 = {self.input_names2[0]: image} ### 43  84 model ###
        inputs2 = {
            self.input_names2[0]: image,
            self.input_names2[1]: demographics,
        }  ### 108 grose model ###

        outputs1 = self.session1.run(None, inputs1)
        outputs2 = self.session2.run(None, inputs2)

        probs1 = outputs1[0].flatten()
        print("|" * 60)
        print(probs1)

        probs1 = F.softmax(torch.tensor(probs1), dim=0)

        probs2 = outputs2[0].flatten()  ### 43 84 model ###
        print(probs2)

        probs2 = F.softmax(torch.tensor(probs2), dim=0)
        probs = probs1 * 0.5 + 0.5 * probs2
        print("|" * 60)
        print(probs1)
        print(probs2)
        print(probs)
        return probs

Test Pytorch

In [5]:
import numpy as np
from PIL import Image

model = OnnxCombineModel(
    "../../models/2025-11-27/speechmaster/18_model118.onnx",
    "../../models/2025-11-27/speechmaster/62_model94.onnx",
)
device = "cpu"
image = Image.open(
    f"../../dataset/dataset00016/0a605167-4e6e-4104-bc06-1aee2e71b33b.jpg"
).convert("RGB")
image = image.resize((512, 512))
image = np.array(image, dtype=np.float32)
image = image * (1.0 / 255.0)

image = np.transpose(image, (2, 0, 1))
image = torch.from_numpy(image).to(device)
image = image.unsqueeze(0)
# print(image.shape)
data = torch.tensor([30, 1, 6], dtype=torch.float32).unsqueeze(0).to(device)
image = image.numpy()
data = data.numpy()
result = model(image, data)

||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
['image', 'meta']
['image', 'meta']
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
[2.9367208e-04 9.5695257e-05 1.5521944e-03 7.4321032e-04 5.8758259e-04
 2.1454692e-04 8.7291002e-05 1.0237098e-04 9.9989229e-01 3.3518672e-04
 9.3370676e-04]
[1.7052889e-04 8.1300735e-05 2.3260713e-04 1.0231137e-04 2.4351478e-04
 3.6185980e-04 2.7593970e-04 2.7954578e-05 9.9990308e-01 2.9215217e-04
 1.0997057e-04]
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
tensor([0.0786, 0.0786, 0.0787, 0.0787, 0.0786, 0.0786, 0.0786, 0.0786, 0.2136,
        0.0786, 0.0787])
tensor([0.0786, 0.0786, 0.0786, 0.0786, 0.0786, 0.0786, 0.0786, 0.0786, 0.2137,
        0.0786, 0.0786])
tensor([0.0786, 0.0786, 0.0787, 0.0786, 0.0786, 0.0786, 0.0786, 0.0786, 0.2137,
        0.0786, 0.0786])


Test Onnx Version

In [None]:
import torchvision.transforms as transforms
import onnxruntime as ort


CLASS_NAMES = [
    "Actinic keratosis (AK)",
    "Basal cell carcinoma (BCC)", 
    "Seborrheic keratosis (SK)",
    "Squamous cell carcinoma (SCC)",
    "Vascular lesion (VASC)",
    "Dermatofibroma (DF)",
    "Benign nevus (NV)",
    "Other non-neoplastic (NON)",
    "Melanoma (MEL)",   
    "Other neoplastic (ON)"
]

class ONNXInference:
    def __init__(self, model_path):
        """Initialize ONNX model session."""

        self.session = ort.InferenceSession(model_path)
        self.input_names = [inp.name for inp in self.session.get_inputs()]
        
        # Image preprocessing
        self.transform = transforms.Compose([
            transforms.Resize((512, 512)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    def preprocess_image(self, image_path):
        """Load and preprocess image to [0,512] range as specified."""
        img = Image.open(image_path).convert('RGB')
        # Resize to 512x512
        img = img.resize((512, 512))
        # Convert to numpy array with [0,512] range
        img_array = np.array(img, dtype=np.float32)
        # Scale from [0,255] to [0,512]
        img_array = img_array * (1 / 255.0)
        # Convert to BCHW format
        img_array = np.transpose(img_array, (2, 0, 1))
        img_array = np.expand_dims(img_array, axis=0)
        return img_array
    
    def predict(self, image_path, age, gender, location):
        """Run inference on a single image with demographic data."""
        # Preprocess image
        image_tensor = self.preprocess_image(image_path)
        
        # Convert demographics to proper format
        # Gender: 'm' -> 1.0, 'f' -> 0.0
        gender_encoded = 1.0 if gender.lower() == 'm' else 0.0
        
        # Prepare demographic data as [age, gender_encoded, location]
        demo_tensor = np.array([[float(age), gender_encoded, float(location)]], dtype=np.float32)
        
        # Run inference
        inputs = {self.input_names[0]: image_tensor, self.input_names[1]: demo_tensor}
        # inputs = {self.input_names[0]: image_tensor}


        # inputs = {self.input_names[0]: image_tensor}
        outputs = self.session.run(None, inputs)
        print(outputs)
        # Model already outputs probabilities (softmax applied in forward pass)
        probs = outputs[0].flatten()
        
        # Get top 3 predictions
        top3_idx = np.argsort(probs)[-3:][::-1]
        top3 = [(CLASS_NAMES[i], float(probs[i])) for i in top3_idx]
        
        return top3
    

print("----------------")
# ort.InferenceSession("onnx/combined-2.onnx")
onnx_model = ONNXInference("model/medicaldev_down_148_196.onnx")
# onnx_model = ONNXInference("model/84.onnx")
predictions = onnx_model.predict(f"example_dataset/dataset00092/a954cebc-6d49-4750-b485-851a307ab3fb.jpg", 30 , "f" , 3)
print(predictions)

## Softmax first model

### Combining 36 vs 43

In [None]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
from onnx import shape_inference
from typing import List

def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ['ReduceL2', 'ReduceMean']:
            debug_nodes.append({
                'name': node.name,
                'op': node.op,
                'inputs_count': len(node.inputs),
                'inputs_types': [type(inp).__name__ for inp in node.inputs],
                'second_input_name': node.inputs[1].name if len(node.inputs) > 1 else None
            })
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (c_node.op == 'Constant' and 
                        c_node.outputs and len(c_node.outputs) == 1 and 
                        c_node.outputs[0].name == axes_var.name):
                        constant_node = c_node
                        if 'value' in c_node.attrs:
                            axes_values = c_node.attrs['value'].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs['axes'] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if 'keepdims' not in node.attrs:
                        node.attrs['keepdims'] = 1
                    fixed_count += 1
                    print(f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'")
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}")
    if debug_nodes:
        print(f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}")
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}")
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count

def create_combined_onnx(model_path1, model_path2, output_path='combined.onnx'):
    """
    Combines two ONNX models into one:
    - Model1: takes 'image' and 'demographics' -> logits1
    - Model2: takes 'image' -> logits2
    - Combined: takes 'image' and 'demographics' -> (softmax(logits1) + logits2) / 2
    
    Note: This averages probabilities from Model1 with raw logits from Model2, which may not be semantically ideal
    due to scale differences (probabilities in [0,1], logits unbounded). Consider if softmax should also be applied
    to Model2 or if the final average should be softmaxed.
    
    Assumes:
    - Both models output a single tensor of shape [batch_size, num_classes] (logits).
    - Input names: Model1 has two inputs (first: image, second: demographics); Model2 has one (image).
    - You need to pip install onnx onnx-graphsurgeon if not already installed.
    """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)
    
    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)
    
    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")
    
    # Assume input names and order based on your code
    # Rename for clarity and sharing
    image_input = graph1.inputs[0]
    image_input.name = 'image'
    
    demographics_input = graph1.inputs[1]
    demographics_input.name = 'demographics'
    
    # Share the image input with model2
    old_image_input = graph2.inputs[0]
    shared_image_input = image_input  # Shared reference, named 'image'
    
    # Replace all references in graph2 nodes from old_image_input to shared_image_input
    for node in graph2.nodes:
        for i in range(len(node.inputs)):
            if node.inputs[i] is old_image_input:
                node.inputs[i] = shared_image_input
    
    # Update graph2's inputs list to use the shared input
    graph2.inputs[0] = shared_image_input
    
    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = 'logits1'
    
    logits2 = graph2.outputs[0]
    logits2.name = 'logits2'
    
    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 10  # Fallback assumption based on reported output size
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 10]  # Fallback
        num_classes = 10
        print(f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 10]")
    
    print(f"Inferred output shape: {output_shape}")
    
    # Define output variables WITH dtype and shape (no flattening)
    probs1 = gs.Variable('probs1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    sum_avg = gs.Variable('sum_avg', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output = gs.Variable('avg_output', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output1 = gs.Variable('avg_output1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output2 = gs.Variable('avg_output2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    
    # Softmax on first model (axis=1 for [batch, classes])
    softmax1 = gs.Node(
        op='Softmax',
        inputs=[logits1],
        outputs=[probs1],
        attrs={'axis': 1}
    )
    
    # Average: (probs1 + logits2) / 2

    
    constant_07 = gs.Constant(name='constant_07', values=np.array(0.4, dtype=np.float32))  # Scalar for broadcast
    constant_03 = gs.Constant(name='constant_03', values=np.array(0.6, dtype=np.float32))  # Scalar for broadcast
    mul1 = gs.Node(
        op='Mul',  # Equivalent to / 2
        inputs=[probs1, constant_07],
        outputs=[avg_output1]
    )
    mul2 = gs.Node(
        op='Mul',  # Equivalent to / 2
        inputs=[logits2, constant_03],
        outputs=[avg_output2]
    )
    
    add = gs.Node(
        op='Add',
        inputs=[avg_output1, avg_output2],
        outputs=[sum_avg]
    )
    # Combined graph: nodes from both + new nodes; inputs: image + demographics; output: avg_output
    combined_graph = gs.Graph(
        nodes=graph1.nodes + graph2.nodes + [softmax1, mul1, mul2, add],
        inputs=[shared_image_input, demographics_input],
        outputs=[sum_avg]
    )
    
    # Set opset on the graph for LayerNormalization support (opset 17+)
    combined_graph.opset = 17
    
    # Cleanup and export
    combined_model = gs.export_onnx(combined_graph.cleanup())
    
    # Infer shapes to fill in any missing (helps checker)
    combined_model = shape_inference.infer_shapes(combined_model)
    
    # Optional: Check model
    onnx.checker.check_model(combined_model)
    
    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")
    
    return combined_model

# Usage
combined = create_combined_onnx('model/36.onnx', 'model/43_modelvip.onnx', "model/test(7.3).onnx")

### Combining 36 vs 84

In [None]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
from onnx import shape_inference
from typing import List


def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ['ReduceL2', 'ReduceMean']:
            debug_nodes.append({
                'name': node.name,
                'op': node.op,
                'inputs_count': len(node.inputs),
                'inputs_types': [type(inp).__name__ for inp in node.inputs],
                'second_input_name': node.inputs[1].name if len(node.inputs) > 1 else None
            })
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (c_node.op == 'Constant' and 
                        c_node.outputs and len(c_node.outputs) == 1 and 
                        c_node.outputs[0].name == axes_var.name):
                        constant_node = c_node
                        if 'value' in c_node.attrs:
                            axes_values = c_node.attrs['value'].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs['axes'] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if 'keepdims' not in node.attrs:
                        node.attrs['keepdims'] = 1
                    fixed_count += 1
                    print(f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'")
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}")
    if debug_nodes:
        print(f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}")
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}")
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count


def _rename_graph_tensors_and_nodes(graph: gs.Graph, prefix: str, skip_vars: List[gs.Variable] = None):
    """Prefix all tensor and node names in `graph` with `prefix`, except variables in skip_vars.

    This avoids name collisions when combining multiple graphs. We compare skip_vars by object id to
    ensure we don't rename the shared input Variable object.
    """
    if skip_vars is None:
        skip_vars = []
    skip_ids = {id(v) for v in skip_vars}

    # Rename variables (tensors)
    tensors = list(graph.tensors().values())
    for var in tensors:
        if id(var) in skip_ids:
            continue
        if var.name:
            var.name = prefix + var.name

    # Rename nodes
    for node in graph.nodes:
        if node.name:
            node.name = prefix + node.name


def create_combined_onnx(model_path1, model_path2, output_path='combined.onnx'):
    """
    Combines two ONNX models into one:
    - Model1: takes 'image' and 'demographics' -> logits1
    - Model2: takes 'image' -> logits2
    - Combined: takes 'image' and 'demographics' -> (softmax(logits1) + logits2) / 2

    Key changes vs. earlier: we rename the second graph's tensors/nodes with a prefix to avoid name collisions
    and ensure the shared `image` input variable object is used by both graphs. This prevents duplicate tensor
    names and topological ordering issues during checker validation.
    """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)

    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)

    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")

    # Rename for clarity and sharing
    image_input = graph1.inputs[0]
    image_input.name = 'image'

    demographics_input = graph1.inputs[1]
    demographics_input.name = 'demographics'

    # Grab model2's image input object BEFORE renaming so we can skip renaming that specific Variable
    old_image_input = graph2.inputs[0]

    # Rename graph2 tensors/nodes to avoid clashes (but don't rename the image Variable object)
    _rename_graph_tensors_and_nodes(graph2, prefix='g2_', skip_vars=[old_image_input])

    # Replace all references in graph2 nodes from old_image_input to the shared image_input object
    for node in graph2.nodes:
        for i in range(len(node.inputs)):
            if node.inputs[i] is old_image_input:
                node.inputs[i] = image_input

    # Update graph2's inputs list to use the shared input object (this removes a duplicate input with same name)
    graph2.inputs[0] = image_input

    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = 'logits1'

    logits2 = graph2.outputs[0]
    logits2.name = 'logits2'

    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 10  # Fallback assumption based on reported output size
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 10]  # Fallback
        num_classes = 10
        print(f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 10]")

    print(f"Inferred output shape: {output_shape}")

    # Define output variables WITH dtype and shape (no flattening)
    probs1 = gs.Variable('probs1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    probs2 = gs.Variable('probs2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    sum_avg = gs.Variable('sum_avg', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output1 = gs.Variable('avg_output1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output2 = gs.Variable('avg_output2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)

    # Softmax on first model (axis=1 for [batch, classes])
    softmax1 = gs.Node(
        op='Softmax',
        inputs=[logits1],
        outputs=[probs1],
        attrs={'axis': 1}
    )    
    softmax2 = gs.Node(
        op='Softmax',
        inputs=[logits2],
        outputs=[probs2],
        attrs={'axis': 1}
    )

    # Average: (probs1 + logits2) / 2

    constant_07 = gs.Constant(name='constant_07', values=np.array(0.7, dtype=np.float32))  # Scalar for broadcast
    constant_03 = gs.Constant(name='constant_03', values=np.array(0.3, dtype=np.float32))  # Scalar for broadcast
    mul1 = gs.Node(
        op='Mul',  # Equivalent to / 2
        inputs=[probs1, constant_03],
        outputs=[avg_output1]
    )
    mul2 = gs.Node(
        op='Mul',  # Equivalent to / 2
        inputs=[probs2, constant_07],
        outputs=[avg_output2]
    )
    add = gs.Node(
        op='Add',
        inputs=[avg_output1, avg_output2],
        outputs=[sum_avg]
    )

    # Combined graph: nodes from both + new nodes; inputs: image + demographics; output: avg_output
    # We put graph1 nodes first, then graph2 nodes (which we've namespaced) so producers appear before consumers.
    combined_graph = gs.Graph(
        nodes=graph1.nodes + graph2.nodes + [softmax1, softmax2, mul1, mul2, add],
        inputs=[image_input, demographics_input],
        outputs=[sum_avg]
    )

    # Set opset on the graph for LayerNormalization support (opset 17+)
    combined_graph.opset = 17

    # Cleanup and export - cleanup will remove unused nodes and should also fix ordering where possible
    combined_model = gs.export_onnx(combined_graph.cleanup())

    # Infer shapes to fill in any missing (helps checker)
    combined_model = shape_inference.infer_shapes(combined_model)

    # Optional: Check model
    onnx.checker.check_model(combined_model)

    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")

    return combined_model


# Usage
# Note: adjust paths as needed
combined = create_combined_onnx('model/36.onnx', 'model/84.onnx', "model/softmax_36_84(3.7).onnx")


### Combining 36 vs 108

In [None]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
import uuid
from onnx import shape_inference
from typing import List


def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ['ReduceL2', 'ReduceMean']:
            debug_nodes.append({
                'name': node.name,
                'op': node.op,
                'inputs_count': len(node.inputs),
                'inputs_types': [type(inp).__name__ for inp in node.inputs],
                'second_input_name': node.inputs[1].name if len(node.inputs) > 1 else None
            })
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (c_node.op == 'Constant' and 
                        c_node.outputs and len(c_node.outputs) == 1 and 
                        c_node.outputs[0].name == axes_var.name):
                        constant_node = c_node
                        if 'value' in c_node.attrs:
                            axes_values = c_node.attrs['value'].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs['axes'] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if 'keepdims' not in node.attrs:
                        node.attrs['keepdims'] = 1
                    fixed_count += 1
                    print(f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'")
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}")
    if debug_nodes:
        print(f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}")
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}")
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count


def _rename_graph_tensors_and_nodes(graph: gs.Graph, prefix: str, skip_vars: List[gs.Variable] = None):
    """Prefix all tensor and node names in `graph` with `prefix`, except variables in skip_vars.

    This avoids name collisions when combining multiple graphs. We compare skip_vars by object id to
    ensure we don't rename shared input Variable objects.
    """
    if skip_vars is None:
        skip_vars = []
    skip_ids = {id(v) for v in skip_vars}

    # Rename variables (tensors)
    for var in list(graph.tensors().values()):
        # skip renaming the exact variable objects that are shared
        if id(var) in skip_ids:
            continue
        if var.name:
            var.name = prefix + var.name

    # Rename nodes
    for node in graph.nodes:
        if node.name:
            node.name = prefix + node.name


def create_combined_onnx(model_path1, model_path2, output_path='combined.onnx'):
    """
    Combines two ONNX models into one:
    - Model1: takes 'image' and 'demographics' -> logits1
    - Model2: takes 'image' -> logits2
    - Combined: takes 'image' and 'demographics' -> average(softmax(logits1), softmax(logits2))

    Approach:
    - Import both graphs with onnx-graphsurgeon
    - Capture model2's original input Variable objects (so we can find & replace them)
    - Namespace (prefix) all graph2 tensors/nodes to avoid collisions, except the original model2 input Variable objects
    - Replace model2's input references with the shared input Variable objects from graph1
    - Build combined graph with graph1 nodes first, then graph2 nodes, then the new ops
    - Cleanup, infer shapes, and run checker
    """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)

    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)

    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")

    # Prepare shared inputs from graph1
    image_input = graph1.inputs[0]
    image_input.name = 'image'

    demographics_input = graph1.inputs[1]
    demographics_input.name = 'demographics'

    # Save model2's original input variable objects so we can target them for replacement
    model2_image_var = graph2.inputs[0]
    model2_demo_var = None
    if len(graph2.inputs) > 1:
        model2_demo_var = graph2.inputs[1]

    # Namespace graph2 to avoid collisions but skip the original input objects
    prefix = 'g2_'
    _rename_graph_tensors_and_nodes(graph2, prefix=prefix, skip_vars=[model2_image_var] + ([model2_demo_var] if model2_demo_var is not None else []))

    # Replace references in graph2 nodes from the original model2 input objects to the shared ones
    for node in graph2.nodes:
        for i, inp in enumerate(node.inputs):
            if inp is model2_image_var:
                node.inputs[i] = image_input
            elif model2_demo_var is not None and inp is model2_demo_var:
                node.inputs[i] = demographics_input

    # Now update graph2's inputs list to use the shared input objects
    graph2.inputs[0] = image_input
    if model2_demo_var is not None:
        # If model2 had a demographics input, map it to the shared demographics
        graph2.inputs[1] = demographics_input

    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = 'logits1'

    logits2 = graph2.outputs[0]
    # The logits2 variable object may have been renamed (prefixed), ensure we use the object itself
    logits2.name = 'logits2'

    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 10  # Fallback assumption based on reported output size
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 10]  # Fallback
        num_classes = 10
        print(f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 10]")

    print(f"Inferred output shape: {output_shape}")

    # Create variables for intermediate and final tensors
    probs1 = gs.Variable('probs1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    probs2 = gs.Variable('probs2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg1 = gs.Variable('avg1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg2 = gs.Variable('avg2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    final_avg = gs.Variable('final_avg', shape=output_shape, dtype=onnx.TensorProto.FLOAT)

    # Softmax on both logits (axis=1)
    softmax1 = gs.Node(op='Softmax', inputs=[logits1], outputs=[probs1], attrs={'axis': 1})
    softmax2 = gs.Node(op='Softmax', inputs=[logits2], outputs=[probs2], attrs={'axis': 1})

    # Multiply each probability vector by 0.5 (use uniquely named constant to avoid duplication)
    constant_07 = gs.Constant(name='constant_07', values=np.array(0.7, dtype=np.float32))  # Scalar for broadcast
    constant_03 = gs.Constant(name='constant_03', values=np.array(0.3, dtype=np.float32))  # Scalar for broadcast

    mul1 = gs.Node(op='Mul', inputs=[probs1, constant_07], outputs=[avg1])
    mul2 = gs.Node(op='Mul', inputs=[probs2, constant_03], outputs=[avg2])

    # Add the two halves to get the average
    add = gs.Node(op='Add', inputs=[avg1, avg2], outputs=[final_avg])

    # Build combined graph
    # Place graph1 nodes first, then graph2 nodes (namespaced), then our fusion nodes
    combined_nodes = list(graph1.nodes) + list(graph2.nodes) + [softmax1, softmax2, mul1, mul2, add]

    combined_graph = gs.Graph(nodes=combined_nodes, inputs=[image_input, demographics_input], outputs=[final_avg])

    # Set a reasonable opset
    combined_graph.opset = max(getattr(graph1, 'opset', 11), getattr(graph2, 'opset', 11), 11)

    # Cleanup and export
    combined_model = gs.export_onnx(combined_graph.cleanup())

    # Infer shapes
    combined_model = shape_inference.infer_shapes(combined_model)

    # Validate
    onnx.checker.check_model(combined_model)

    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")

    return combined_model


# Usage example (adjust paths as needed)
combined = create_combined_onnx('model/36.onnx', 'model/108_grose.onnx', "model/softmax_36_108(7.3).onnx")


### Combining 18 vs 62

In [15]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
from onnx import shape_inference
from typing import List


def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ["ReduceL2", "ReduceMean"]:
            debug_nodes.append(
                {
                    "name": node.name,
                    "op": node.op,
                    "inputs_count": len(node.inputs),
                    "inputs_types": [type(inp).__name__ for inp in node.inputs],
                    "second_input_name": (
                        node.inputs[1].name if len(node.inputs) > 1 else None
                    ),
                }
            )
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (
                        c_node.op == "Constant"
                        and c_node.outputs
                        and len(c_node.outputs) == 1
                        and c_node.outputs[0].name == axes_var.name
                    ):
                        constant_node = c_node
                        if "value" in c_node.attrs:
                            axes_values = c_node.attrs["value"].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs["axes"] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if "keepdims" not in node.attrs:
                        node.attrs["keepdims"] = 1
                    fixed_count += 1
                    print(
                        f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'"
                    )
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(
                        f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}"
                    )
    if debug_nodes:
        print(
            f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}"
        )
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(
                f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}"
            )
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count


def create_combined_onnx(
    model_path1,
    model_path2,
    whitelist1: List[int],
    whitelist2: List[int],
    scale_const: float = 3.0,
    output_path="combined.onnx",
):
    """
    Combines two ONNX models into one with conditional logic based on whitelists for skin cancer strategy:
    - Compute logits1 from model1.
    - If argmax(logits1) in whitelist1, use logits1.
    - Else compute logits2 from model2.
    - If argmax(logits2) in whitelist2, use logits2.
    - Else use average of logits1 and logits2.
    - Then scale the chosen logits by scale_const and apply softmax for output.

    Assumes:
    - Both models take 'image' and 'demographics' as inputs.
    - Both output a single tensor of shape [batch_size, num_classes] (logits).
    - Whitelists are lists of class indices (integers).
    - You need to pip install onnx onnx-graphsurgeon if not already installed.
    """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)

    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)

    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")

    # Assume input names and order based on your code
    # Rename for clarity
    image_input = graph1.inputs[0]
    image_input.name = "image"

    demographics_input = graph1.inputs[1]
    demographics_input.name = "demographics"

    # Share the inputs with model2
    old_image_input = graph2.inputs[0]
    old_demo_input = graph2.inputs[1]

    # Replace all references in graph2 nodes to use shared inputs
    for node in graph2.nodes:
        for i in range(len(node.inputs)):
            if node.inputs[i] is old_image_input:
                node.inputs[i] = image_input
            if node.inputs[i] is old_demo_input:
                node.inputs[i] = demographics_input

    # Update graph2's inputs list to use the shared inputs
    graph2.inputs[0] = image_input
    graph2.inputs[1] = demographics_input

    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = "logits1"

    logits2 = graph2.outputs[0]
    logits2.name = "logits2"

    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 11  # Fallback assumption
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 11]  # Fallback
        num_classes = 11
        print(
            f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 11]"
        )

    print(f"Inferred output shape: {output_shape}")

    # Define variables
    class1 = gs.Variable("class1", shape=[None], dtype=np.int64)
    class1_unsq = gs.Variable("class1_unsq", shape=[None, 1], dtype=np.int64)
    eq1 = gs.Variable("eq1", dtype=np.bool)
    cast1 = gs.Variable("cast1", dtype=np.float32)
    reduce1 = gs.Variable("reduce1", shape=[None, 1], dtype=np.float32)
    is_in1 = gs.Variable("is_in1", shape=[None, 1], dtype=np.bool)

    class2 = gs.Variable("class2", shape=[None], dtype=np.int64)
    class2_unsq = gs.Variable("class2_unsq", shape=[None, 1], dtype=np.int64)
    eq2 = gs.Variable("eq2", dtype=np.bool)
    cast2 = gs.Variable("cast2", dtype=np.float32)
    reduce2 = gs.Variable("reduce2", shape=[None, 1], dtype=np.float32)
    is_in2 = gs.Variable("is_in2", shape=[None, 1], dtype=np.bool)

    add_output = gs.Variable("add_output", shape=output_shape, dtype=np.float32)
    avg_logits = gs.Variable("avg_logits", shape=output_shape, dtype=np.float32)
    inner_selected = gs.Variable("inner_selected", shape=output_shape, dtype=np.float32)
    selected_logits = gs.Variable("selected_logits", shape=output_shape, dtype=np.float32)
    scaled_logits = gs.Variable("scaled_logits", shape=output_shape, dtype=np.float32)
    final_output = gs.Variable("final_output", shape=output_shape, dtype=np.float32)

    # Constants
    whitelist1_const = gs.Constant(
        "whitelist1", values=np.array(whitelist1, dtype=np.int64)
    )
    whitelist2_const = gs.Constant(
        "whitelist2", values=np.array(whitelist2, dtype=np.int64)
    )
    zero_const = gs.Constant("zero", values=np.array(0.0, dtype=np.float32))
    two_const = gs.Constant("two", values=np.array(2.0, dtype=np.float32))
    scale_const_node = gs.Constant("scale", values=np.array(scale_const, dtype=np.float32))
    axes_unsq = gs.Constant("axes_unsq", values=np.array([1], dtype=np.int64))
    axes_reduce = gs.Constant("axes_reduce", values=np.array([1], dtype=np.int64))

    # Nodes for whitelist1 check
    argmax1 = gs.Node(
        op="ArgMax", inputs=[logits1], outputs=[class1], attrs={"axis": 1}
    )
    unsqueeze1 = gs.Node(
        op="Unsqueeze", inputs=[class1, axes_unsq], outputs=[class1_unsq]
    )
    equal1 = gs.Node(
        op="Equal", inputs=[class1_unsq, whitelist1_const], outputs=[eq1]
    )
    cast1_node = gs.Node(
        op="Cast", inputs=[eq1], outputs=[cast1], attrs={"to": onnx.TensorProto.FLOAT}
    )
    reducesum1 = gs.Node(
        op="ReduceSum",
        inputs=[cast1, axes_reduce],
        outputs=[reduce1],
        attrs={"keepdims": 1},
    )
    greater1 = gs.Node(
        op="Greater", inputs=[reduce1, zero_const], outputs=[is_in1]
    )

    # Nodes for whitelist2 check
    argmax2 = gs.Node(
        op="ArgMax", inputs=[logits2], outputs=[class2], attrs={"axis": 1}
    )
    unsqueeze2 = gs.Node(
        op="Unsqueeze", inputs=[class2, axes_unsq], outputs=[class2_unsq]
    )
    equal2 = gs.Node(
        op="Equal", inputs=[class2_unsq, whitelist2_const], outputs=[eq2]
    )
    cast2_node = gs.Node(
        op="Cast", inputs=[eq2], outputs=[cast2], attrs={"to": onnx.TensorProto.FLOAT}
    )
    reducesum2 = gs.Node(
        op="ReduceSum",
        inputs=[cast2, axes_reduce],
        outputs=[reduce2],
        attrs={"keepdims": 1},
    )
    greater2 = gs.Node(
        op="Greater", inputs=[reduce2, zero_const], outputs=[is_in2]
    )

    # Combine logic
    add_logits = gs.Node(op="Add", inputs=[logits1, logits2], outputs=[add_output])
    avg_node = gs.Node(op="Div", inputs=[add_output, two_const], outputs=[avg_logits])
    inner_where = gs.Node(
        op="Where",
        inputs=[is_in2, logits2, avg_logits],
        outputs=[inner_selected],
    )
    outer_where = gs.Node(
        op="Where",
        inputs=[is_in1, logits1, inner_selected],
        outputs=[selected_logits],
    )
    scale_mul = gs.Node(
        op="Mul", inputs=[selected_logits, scale_const_node], outputs=[scaled_logits]
    )
    softmax_final = gs.Node(
        op="Softmax", inputs=[scaled_logits], outputs=[final_output], attrs={"axis": 1}
    )

    # Combined graph: nodes from both + new nodes; inputs: image + demographics; output: final_output
    combined_graph = gs.Graph(
        nodes=graph1.nodes
        + graph2.nodes
        + [
            argmax1,
            unsqueeze1,
            equal1,
            cast1_node,
            reducesum1,
            greater1,
            argmax2,
            unsqueeze2,
            equal2,
            cast2_node,
            reducesum2,
            greater2,
            add_logits,
            avg_node,
            inner_where,
            outer_where,
            scale_mul,
            softmax_final,
        ],
        inputs=[image_input, demographics_input],
        outputs=[final_output],
    )

    # Set opset on the graph for LayerNormalization support (opset 17+)
    combined_graph.opset = 17

    # Cleanup and export
    combined_model = gs.export_onnx(combined_graph.cleanup())

    # Infer shapes to fill in any missing (helps checker)
    combined_model = shape_inference.infer_shapes(combined_model)

    # Optional: Check model
    onnx.checker.check_model(combined_model)

    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")

    return combined_model


# Usage example (replace with your actual whitelists)
whitelist1 = [0, 3, 10]  # Example: classes for model1
whitelist2 = [1, 2, 4, 5, 6, 7, 8, 9]  # Example: classes for model2
combined = create_combined_onnx(
    "../../models/2025-11-27/speechmaster/18_model118.onnx",
    "../../models/2025-11-27/speechmaster/62_model94.onnx",
    whitelist1,
    whitelist2,
    3.0,
    "../../models/combine/2025-11-27/18vs62.onnx",
)

[Model1] Total ReduceMean nodes: 34, Fixed: 0
  - /model/base_model/blocks/blocks.0/blocks.0.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.0/blocks.0.1/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.1/blocks.1.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  ... and 31 more
[Model2] Total ReduceMean nodes: 32, Fixed: 0
  - /model/base_model/blocks/blocks.0/blocks.0.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.0/blocks.0.1/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.1/blocks.1.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  ... and 29 more
No reduction fixes applied - check debug output above
Inferred output shape: [None, 11]
[W] colored module is not installe

### Combining 18 vs 122

In [None]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
from onnx import shape_inference
from typing import List


def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ["ReduceL2", "ReduceMean"]:
            debug_nodes.append(
                {
                    "name": node.name,
                    "op": node.op,
                    "inputs_count": len(node.inputs),
                    "inputs_types": [type(inp).__name__ for inp in node.inputs],
                    "second_input_name": (
                        node.inputs[1].name if len(node.inputs) > 1 else None
                    ),
                }
            )
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (
                        c_node.op == "Constant"
                        and c_node.outputs
                        and len(c_node.outputs) == 1
                        and c_node.outputs[0].name == axes_var.name
                    ):
                        constant_node = c_node
                        if "value" in c_node.attrs:
                            axes_values = c_node.attrs["value"].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs["axes"] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if "keepdims" not in node.attrs:
                        node.attrs["keepdims"] = 1
                    fixed_count += 1
                    print(
                        f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'"
                    )
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(
                        f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}"
                    )
    if debug_nodes:
        print(
            f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}"
        )
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(
                f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}"
            )
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count


def _rename_graph_tensors_and_nodes(
    graph: gs.Graph, prefix: str, skip_vars: List[gs.Variable] = None
):
    """Prefix all tensor and node names in `graph` with `prefix`, except variables in skip_vars.

    This avoids name collisions when combining multiple graphs. We compare skip_vars by object id to
    ensure we don't rename the shared input Variable object.
    """
    if skip_vars is None:
        skip_vars = []
    skip_ids = {id(v) for v in skip_vars}

    # Rename variables (tensors)
    tensors = list(graph.tensors().values())
    for var in tensors:
        if id(var) in skip_ids:
            continue
        if var.name:
            var.name = prefix + var.name

    # Rename nodes
    for node in graph.nodes:
        if node.name:
            node.name = prefix + node.name


def create_combined_onnx(
    model_path1,
    model_path2,
    whitelist1: List[int],
    whitelist2: List[int],
    scale_const: float = 3.0,
    output_path="combined.onnx",
):
    """
    Combines two ONNX models into one with conditional logic based on whitelists for skin cancer strategy:
    - Compute logits1 from model1.
    - If argmax(logits1) in whitelist1, use logits1.
    - Else compute logits2 from model2.
    - If argmax(logits2) in whitelist2, use logits2.
    - Else use average of logits1 and logits2.
    - Then scale the chosen logits by scale_const and apply softmax for output.

    Assumes:
    - Both models take 'image' and 'demographics' as inputs.
    - Both output a single tensor of shape [batch_size, num_classes] (logits).
    - Whitelists are lists of class indices (integers).
    - You need to pip install onnx onnx-graphsurgeon if not already installed.
    """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)

    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)

    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")

    # Assume input names and order based on your code
    # Rename for clarity
    image_input = graph1.inputs[0]
    image_input.name = "image"

    demographics_input = graph1.inputs[1]
    demographics_input.name = "demographics"

    # Share the inputs with model2
    old_image_input = graph2.inputs[0]
    old_demo_input = graph2.inputs[1]

    _rename_graph_tensors_and_nodes(
        graph2, prefix="g2_", skip_vars=[old_image_input, old_demo_input]
    )

    # Replace all references in graph2 nodes to use shared inputs
    for node in graph2.nodes:
        for i in range(len(node.inputs)):
            if node.inputs[i] is old_image_input:
                node.inputs[i] = image_input
            if node.inputs[i] is old_demo_input:
                node.inputs[i] = demographics_input

    # Update graph2's inputs list to use the shared inputs
    graph2.inputs[0] = image_input
    graph2.inputs[1] = demographics_input

    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = "logits1"

    logits2 = graph2.outputs[0]
    logits2.name = "logits2"

    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 11  # Fallback assumption
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 11]  # Fallback
        num_classes = 11
        print(
            f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 11]"
        )

    print(f"Inferred output shape: {output_shape}")

    # Define variables
    class1 = gs.Variable("class1", shape=[None], dtype=np.int64)
    class1_unsq = gs.Variable("class1_unsq", shape=[None, 1], dtype=np.int64)
    eq1 = gs.Variable("eq1", shape=[None, len(whitelist1)], dtype=np.bool)
    cast1 = gs.Variable("cast1", shape=[None, len(whitelist1)], dtype=np.float32)
    reduce1 = gs.Variable("reduce1", shape=[None, 1], dtype=np.float32)
    is_in1 = gs.Variable("is_in1", shape=[None, 1], dtype=np.bool)
    is_in1_exp = gs.Variable("is_in1_exp", shape=output_shape, dtype=np.bool)

    class2 = gs.Variable("class2", shape=[None], dtype=np.int64)
    class2_unsq = gs.Variable("class2_unsq", shape=[None, 1], dtype=np.int64)
    eq2 = gs.Variable("eq2", shape=[None, len(whitelist2)], dtype=np.bool)
    cast2 = gs.Variable("cast2", shape=[None, len(whitelist2)], dtype=np.float32)
    reduce2 = gs.Variable("reduce2", shape=[None, 1], dtype=np.float32)
    is_in2 = gs.Variable("is_in2", shape=[None, 1], dtype=np.bool)
    is_in2_exp = gs.Variable("is_in2_exp", shape=output_shape, dtype=np.bool)

    add_output = gs.Variable("add_output", shape=output_shape, dtype=np.float32)
    avg_logits = gs.Variable("avg_logits", shape=output_shape, dtype=np.float32)
    inner_selected = gs.Variable("inner_selected", shape=output_shape, dtype=np.float32)
    selected_logits = gs.Variable(
        "selected_logits", shape=output_shape, dtype=np.float32
    )
    scaled_logits = gs.Variable("scaled_logits", shape=output_shape, dtype=np.float32)
    final_output = gs.Variable("final_output", shape=output_shape, dtype=np.float32)

    # Constants
    whitelist1_const = gs.Constant(
        "whitelist1", values=np.array(whitelist1, dtype=np.int64)
    )
    whitelist2_const = gs.Constant(
        "whitelist2", values=np.array(whitelist2, dtype=np.int64)
    )
    zero_const = gs.Constant("zero", values=np.array(0.0, dtype=np.float32))
    two_const = gs.Constant("two", values=np.array(2.0, dtype=np.float32))
    scale_const_node = gs.Constant(
        "scale", values=np.array(scale_const, dtype=np.float32)
    )
    axes_unsq = gs.Constant("axes_unsq", values=np.array([1], dtype=np.int64))
    axes_reduce = gs.Constant("axes_reduce", values=np.array([1], dtype=np.int64))
    logits_shape = gs.Constant("logits_shape", values=np.array([-1, num_classes], dtype=np.int64))

    # Nodes for whitelist1 check
    argmax1 = gs.Node(
        op="ArgMax", inputs=[logits1], outputs=[class1], attrs={"axis": 1}
    )
    unsqueeze1 = gs.Node(
        op="Unsqueeze", inputs=[class1, axes_unsq], outputs=[class1_unsq]
    )
    equal1 = gs.Node(op="Equal", inputs=[class1_unsq, whitelist1_const], outputs=[eq1])
    cast1_node = gs.Node(
        op="Cast", inputs=[eq1], outputs=[cast1], attrs={"to": onnx.TensorProto.FLOAT}
    )
    reducesum1 = gs.Node(
        op="ReduceSum",
        inputs=[cast1, axes_reduce],
        outputs=[reduce1],
        attrs={"keepdims": 1},
    )
    greater1 = gs.Node(op="Greater", inputs=[reduce1, zero_const], outputs=[is_in1])
    expand1 = gs.Node(
        op="Expand", inputs=[is_in1, logits_shape], outputs=[is_in1_exp]
    )

    # Nodes for whitelist2 check
    argmax2 = gs.Node(
        op="ArgMax", inputs=[logits2], outputs=[class2], attrs={"axis": 1}
    )
    unsqueeze2 = gs.Node(
        op="Unsqueeze", inputs=[class2, axes_unsq], outputs=[class2_unsq]
    )
    equal2 = gs.Node(op="Equal", inputs=[class2_unsq, whitelist2_const], outputs=[eq2])
    cast2_node = gs.Node(
        op="Cast", inputs=[eq2], outputs=[cast2], attrs={"to": onnx.TensorProto.FLOAT}
    )
    reducesum2 = gs.Node(
        op="ReduceSum",
        inputs=[cast2, axes_reduce],
        outputs=[reduce2],
        attrs={"keepdims": 1},
    )
    greater2 = gs.Node(op="Greater", inputs=[reduce2, zero_const], outputs=[is_in2])
    expand2 = gs.Node(
        op="Expand", inputs=[is_in2, logits_shape], outputs=[is_in2_exp]
    )

    # Combine logic
    add_logits = gs.Node(op="Add", inputs=[logits1, logits2], outputs=[add_output])
    avg_node = gs.Node(op="Div", inputs=[add_output, two_const], outputs=[avg_logits])
    inner_where = gs.Node(
        op="Where",
        inputs=[is_in2_exp, logits2, avg_logits],
        outputs=[inner_selected],
    )
    outer_where = gs.Node(
        op="Where",
        inputs=[is_in1_exp, logits1, inner_selected],
        outputs=[selected_logits],
    )
    scale_mul = gs.Node(
        op="Mul", inputs=[selected_logits, scale_const_node], outputs=[scaled_logits]
    )
    softmax_final = gs.Node(
        op="Softmax", inputs=[scaled_logits], outputs=[final_output], attrs={"axis": 1}
    )

    # Combined graph: nodes from both + new nodes; inputs: image + demographics; output: final_output
    combined_graph = gs.Graph(
        nodes=graph1.nodes
        + graph2.nodes
        + [
            argmax1,
            unsqueeze1,
            equal1,
            cast1_node,
            reducesum1,
            greater1,
            expand1,
            argmax2,
            unsqueeze2,
            equal2,
            cast2_node,
            reducesum2,
            greater2,
            expand2,
            add_logits,
            avg_node,
            inner_where,
            outer_where,
            scale_mul,
            softmax_final,
        ],
        inputs=[image_input, demographics_input],
        outputs=[final_output],
    )

    # Set opset on the graph for LayerNormalization support (opset 17+)
    combined_graph.opset = 17

    # Cleanup and export
    combined_model = gs.export_onnx(combined_graph.cleanup())

    # Infer shapes to fill in any missing (helps checker)
    combined_model = shape_inference.infer_shapes(combined_model)

    # Optional: Check model
    onnx.checker.check_model(combined_model)

    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")

    return combined_model


# Usage example (replace with your actual whitelists)
whitelist1 = [0, 3, 10]  # Example: classes for model1
whitelist2 = [1, 2, 4, 5, 6, 7, 8, 9]  # Example: classes for model2
combined = create_combined_onnx(
    "../../models/2025-11-27/speechmaster/18_model118.onnx",
    "../../models/2025-11-27/speechmaster/62_model94.onnx",
    whitelist1,
    whitelist2,
    3.0,
    "../../models/combine/2025-11-27/18vs62.onnx",
)

[Model1] Total ReduceMean nodes: 34, Fixed: 0
  - /model/base_model/blocks/blocks.0/blocks.0.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.0/blocks.0.1/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.1/blocks.1.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  ... and 31 more
[Model2] Total ReduceMean nodes: 32, Fixed: 0
  - /model/base_model/blocks/blocks.0/blocks.0.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.0/blocks.0.1/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  - /model/base_model/blocks/blocks.1/blocks.1.0/se/ReduceMean: ReduceMean, 1 inputs, types: ['Variable'], second_name: None
  ... and 29 more
No reduction fixes applied - check debug output above
Inferred output shape: [None, 11]
Combined ONNX model saved to ../..

### Combining 148 vs 196

In [None]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
from onnx import shape_inference
from typing import List


def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ['ReduceL2', 'ReduceMean']:
            debug_nodes.append({
                'name': node.name,
                'op': node.op,
                'inputs_count': len(node.inputs),
                'inputs_types': [type(inp).__name__ for inp in node.inputs],
                'second_input_name': node.inputs[1].name if len(node.inputs) > 1 else None
            })
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (c_node.op == 'Constant' and 
                        c_node.outputs and len(c_node.outputs) == 1 and 
                        c_node.outputs[0].name == axes_var.name):
                        constant_node = c_node
                        if 'value' in c_node.attrs:
                            axes_values = c_node.attrs['value'].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs['axes'] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if 'keepdims' not in node.attrs:
                        node.attrs['keepdims'] = 1
                    fixed_count += 1
                    print(f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'")
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}")
    if debug_nodes:
        print(f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}")
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}")
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count


def _rename_graph_tensors_and_nodes(graph: gs.Graph, prefix: str, skip_vars: List[gs.Variable] = None):
    """Prefix all tensor and node names in `graph` with `prefix`, except variables in skip_vars.

    This avoids name collisions when combining multiple graphs. We compare skip_vars by object id to
    ensure we don't rename the shared input Variable object.
    """
    if skip_vars is None:
        skip_vars = []
    skip_ids = {id(v) for v in skip_vars}

    # Rename variables (tensors)
    tensors = list(graph.tensors().values())
    for var in tensors:
        if id(var) in skip_ids:
            continue
        if var.name:
            var.name = prefix + var.name

    # Rename nodes
    for node in graph.nodes:
        if node.name:
            node.name = prefix + node.name


def create_combined_onnx(model_path1, model_path2, output_path='combined.onnx'):
    """
    Combines two ONNX models into one:
    - Model1: takes 'image' and 'demographics' -> logits1
    - Model2: takes 'image' -> logits2
    - Combined: takes 'image' and 'demographics' -> (softmax(logits1) + logits2) / 2

    Key changes vs. earlier: we rename the second graph's tensors/nodes with a prefix to avoid name collisions
    and ensure the shared `image` input variable object is used by both graphs. This prevents duplicate tensor
    names and topological ordering issues during checker validation.
        """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)

    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)

    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")

    # Rename for clarity and sharing
    image_input = graph1.inputs[0]
    image_input.name = 'image'

    demographics_input = graph1.inputs[1]
    demographics_input.name = 'demographics'

    # Grab model2's image input object BEFORE renaming so we can skip renaming that specific Variable
    old_image_input = graph2.inputs[0]
    old_demo_input = graph2.inputs[1]

    # Rename graph2 tensors/nodes to avoid clashes (but don't rename the image Variable object)
    _rename_graph_tensors_and_nodes(graph2, prefix='g2_', skip_vars=[old_image_input, old_demo_input])

    # Replace all references in graph2 nodes from old_image_input to the shared image_input object
    for node in graph2.nodes:
        for i in range(len(node.inputs)):
            if node.inputs[i] is old_image_input:
                node.inputs[i] = image_input
            if node.inputs[i] is old_demo_input:
                node.inputs[i] = demographics_input

    # Update graph2's inputs list to use the shared input object (this removes a duplicate input with same name)
    graph2.inputs[0] = image_input
    graph2.inputs[1] = demographics_input

    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = 'logits1'

    logits2 = graph2.outputs[0]
    logits2.name = 'logits2'

    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 11  # Fallback assumption based on reported output size
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 11]  # Fallback
        num_classes = 11
        print(f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 10]")

    print(f"Inferred output shape: {output_shape}")

    # Define output variables WITH dtype and shape (no flattening)
    probs1 = gs.Variable('probs1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    probs2 = gs.Variable('probs2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    sum_avg = gs.Variable('sum_avg', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output1 = gs.Variable('avg_output1', shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output2 = gs.Variable('avg_output2', shape=output_shape, dtype=onnx.TensorProto.FLOAT)

    # Softmax on first model (axis=1 for [batch, classes])
    softmax1 = gs.Node(
        op='Softmax',
        inputs=[logits1],
        outputs=[probs1],
        attrs={'axis': 1}
    )    
    softmax2 = gs.Node(
        op='Softmax',
        inputs=[logits2],
        outputs=[probs2],
        attrs={'axis': 1}
    )

    # Average: (probs1 + logits2) / 2

    constant_07 = gs.Constant(name='constant_07', values=np.array(0.3, dtype=np.float32))  # Scalar for broadcast
    constant_03 = gs.Constant(name='constant_03', values=np.array(0.7, dtype=np.float32))  # Scalar for broadcast
    mul1 = gs.Node(
        op='Mul',  # Equivalent to / 2
        inputs=[probs1, constant_03],
        outputs=[avg_output1]
    )
    mul2 = gs.Node(
        op='Mul',  # Equivalent to / 2
        inputs=[probs2, constant_07],
        outputs=[avg_output2]
    )
    add = gs.Node(
        op='Add',
        inputs=[avg_output1, avg_output2],
        outputs=[sum_avg]
    )

    # Combined graph: nodes from both + new nodes; inputs: image + demographics; output: avg_output
    # We put graph1 nodes first, then graph2 nodes (which we've namespaced) so producers appear before consumers.
    combined_graph = gs.Graph(
        nodes=graph1.nodes + graph2.nodes + [softmax1, softmax2, mul1, mul2, add],
        inputs=[image_input, demographics_input],
        outputs=[sum_avg]
    )

    # Set opset on the graph for LayerNormalization support (opset 17+)
    combined_graph.opset = 17

    # Cleanup and export - cleanup will remove unused nodes and should also fix ordering where possible
    combined_model = gs.export_onnx(combined_graph.cleanup())

    # Infer shapes to fill in any missing (helps checker)
    combined_model = shape_inference.infer_shapes(combined_model)

    # Optional: Check model
    onnx.checker.check_model(combined_model)

    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")

    return combined_model

# Usage
# Note: adjust paths as needed
combined = create_combined_onnx('model/medicaldev_148.onnx', 'model/medicaldev_196.onnx', "model/medicaldev_148_196.onnx")


### Down version

In [None]:
import onnx

# Load the original model
model = onnx.load("model/medicaldev_148_196.onnx")

# Check original details (optional: for debugging)
print("Original IR version:", model.ir_version)
print("Original opset versions:", [(imp.domain, imp.version) for imp in model.opset_import])

# Downgrade IR version to 11 (your runtime's max)
model.ir_version = 10

# Save the downgraded model
downgraded_path = "model/medicaldev_down_148_196.onnx"
onnx.save(model, downgraded_path)
print(f"Downgraded model saved to: {downgraded_path}")

# 61 vs 62

In [None]:
import onnx
import onnx_graphsurgeon as gs
import numpy as np
from onnx import shape_inference
from typing import List
import os


def fix_reduction_nodes(graph: gs.Graph, graph_name: str = "unknown"):
    """
    Fixes ReduceL2 and ReduceMean nodes that incorrectly have axes as input (2 inputs) by moving axes to attribute.
    Searches for the Constant node producing the axes Variable and extracts its value.
    Removes the axes input and the unused Constant node after fix.
    Adds debug prints for all ReduceL2 and ReduceMean nodes.
    """
    fixed_count = 0
    removed_constants = 0
    debug_nodes = []
    for node in graph.nodes:
        if node.op in ["ReduceL2", "ReduceMean"]:
            debug_nodes.append(
                {
                    "name": node.name,
                    "op": node.op,
                    "inputs_count": len(node.inputs),
                    "inputs_types": [type(inp).__name__ for inp in node.inputs],
                    "second_input_name": (
                        node.inputs[1].name if len(node.inputs) > 1 else None
                    ),
                }
            )
            if len(node.inputs) == 2:
                data_input = node.inputs[0]
                axes_var = node.inputs[1]
                # Search for Constant node producing axes_var
                constant_node = None
                axes_values = None
                for c_node in graph.nodes:
                    if (
                        c_node.op == "Constant"
                        and c_node.outputs
                        and len(c_node.outputs) == 1
                        and c_node.outputs[0].name == axes_var.name
                    ):
                        constant_node = c_node
                        if "value" in c_node.attrs:
                            axes_values = c_node.attrs["value"].values
                            if isinstance(axes_values, np.ndarray):
                                axes_values = axes_values.tolist()
                        break
                if constant_node and axes_values is not None:
                    # Update node: remove second input, add axes attr
                    node.inputs = [data_input]
                    node.attrs["axes"] = axes_values
                    # Ensure keepdims is set (default 1 for most reductions)
                    if "keepdims" not in node.attrs:
                        node.attrs["keepdims"] = 1
                    fixed_count += 1
                    print(
                        f"[{graph_name}] Fixed {node.op} node '{node.name}': axes {axes_values} extracted from Constant '{constant_node.name}'"
                    )
                    # Mark for removal; cleanup will handle unused nodes
                    removed_constants += 1
                else:
                    print(
                        f"[{graph_name}] Warning: Could not find/extract axes for {node.op} '{node.name}'; second input '{axes_var.name}', Constant found: {constant_node is not None}"
                    )
    if debug_nodes:
        print(
            f"[{graph_name}] Total {', '.join(set(dn['op'] for dn in debug_nodes))} nodes: {len(debug_nodes)}, Fixed: {fixed_count}"
        )
        for dn in debug_nodes[:3]:  # Print first 3 for brevity
            print(
                f"  - {dn['name']}: {dn['op']}, {dn['inputs_count']} inputs, types: {dn['inputs_types']}, second_name: {dn['second_input_name']}"
            )
        if len(debug_nodes) > 3:
            print(f"  ... and {len(debug_nodes)-3} more")
    return fixed_count


def _rename_graph_tensors_and_nodes(
    graph: gs.Graph, prefix: str, skip_vars: List[gs.Variable] = None
):
    """Prefix all tensor and node names in `graph` with `prefix`, except variables in skip_vars.

    This avoids name collisions when combining multiple graphs. We compare skip_vars by object id to
    ensure we don't rename the shared input Variable object.
    """
    if skip_vars is None:
        skip_vars = []
    skip_ids = {id(v) for v in skip_vars}

    # Rename variables (tensors)
    tensors = list(graph.tensors().values())
    for var in tensors:
        if id(var) in skip_ids:
            continue
        if var.name:
            var.name = prefix + var.name

    # Rename nodes
    for node in graph.nodes:
        if node.name:
            node.name = prefix + node.name


def create_combined_onnx(model_path1, model_path2, output_path="combined.onnx"):
    """
    Combines two ONNX models into one:
    - Model1: takes 'image' and 'demographics' -> logits1
    - Model2: takes 'image' -> logits2
    - Combined: takes 'image' and 'demographics' -> (softmax(logits1) + logits2) / 2

    Key changes vs. earlier: we rename the second graph's tensors/nodes with a prefix to avoid name collisions
    and ensure the shared `image` input variable object is used by both graphs. This prevents duplicate tensor
    names and topological ordering issues during checker validation.
    """
    # Load the models
    onnx_model1 = onnx.load(model_path1)
    onnx_model2 = onnx.load(model_path2)

    # Import into graph surgeon
    graph1 = gs.import_onnx(onnx_model1)
    graph2 = gs.import_onnx(onnx_model2)

    # Fix reduction nodes in BOTH graphs for thoroughness
    total_fixed = 0
    total_fixed += fix_reduction_nodes(graph1, "Model1")
    total_fixed += fix_reduction_nodes(graph2, "Model2")
    if total_fixed == 0:
        print("No reduction fixes applied - check debug output above")

    # Rename for clarity and sharing
    image_input = graph1.inputs[0]
    image_input.name = "image"

    demographics_input = graph1.inputs[1]
    demographics_input.name = "demographics"

    # Grab model2's image input object BEFORE renaming so we can skip renaming that specific Variable
    old_image_input = graph2.inputs[0]
    old_demo_input = graph2.inputs[1]

    # Rename graph2 tensors/nodes to avoid clashes (but don't rename the image Variable object)
    _rename_graph_tensors_and_nodes(
        graph2, prefix="g2_", skip_vars=[old_image_input, old_demo_input]
    )
    # _rename_graph_tensors_and_nodes(graph2, prefix="g2_", skip_vars=[old_image_input])

    # Replace all references in graph2 nodes from old_image_input to the shared image_input object
    for node in graph2.nodes:
        for i in range(len(node.inputs)):
            if node.inputs[i] is old_image_input:
                node.inputs[i] = image_input
            if node.inputs[i] is old_demo_input:
                node.inputs[i] = demographics_input

    # Update graph2's inputs list to use the shared input object (this removes a duplicate input with same name)
    graph2.inputs[0] = image_input
    graph2.inputs[1] = demographics_input

    # Get outputs (assume single output each)
    logits1 = graph1.outputs[0]
    logits1.name = "logits1"

    logits2 = graph2.outputs[0]
    logits2.name = "logits2"

    # Extract num_classes from logits1 shape (assume [batch, num_classes]; batch dynamic)
    orig_shape = logits1.shape
    if orig_shape and len(orig_shape) >= 2:
        num_classes = orig_shape[-1]
        if num_classes == 0 or num_classes is None:
            num_classes = 11  # Fallback assumption based on reported output size
        output_shape = [None, num_classes]  # Dynamic batch
    else:
        output_shape = [None, 11]  # Fallback
        num_classes = 11
        print(
            f"Warning: Could not infer num_classes from shape {orig_shape}; using fallback [None, 10]"
        )

    print(f"Inferred output shape: {output_shape}")

    # Define output variables WITH dtype and shape (no flattening)
    probs1 = gs.Variable("probs1", shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    probs2 = gs.Variable("probs2", shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    sum_avg = gs.Variable("sum_avg", shape=output_shape, dtype=onnx.TensorProto.FLOAT)
    avg_output1 = gs.Variable(
        "avg_output1", shape=output_shape, dtype=onnx.TensorProto.FLOAT
    )
    avg_output2 = gs.Variable(
        "avg_output2", shape=output_shape, dtype=onnx.TensorProto.FLOAT
    )

    # Softmax on first model (axis=1 for [batch, classes])
    softmax1 = gs.Node(
        op="Softmax", inputs=[logits1], outputs=[probs1], attrs={"axis": 1}
    )
    softmax2 = gs.Node(
        op="Softmax", inputs=[logits2], outputs=[probs2], attrs={"axis": 1}
    )

    # Average: (probs1 + logits2) / 2

    constant_07 = gs.Constant(
        name="constant_07", values=np.array(0.5, dtype=np.float32)
    )  # Scalar for broadcast
    constant_03 = gs.Constant(
        name="constant_03", values=np.array(0.5, dtype=np.float32)
    )  # Scalar for broadcast
    mul1 = gs.Node(
        op="Mul",  # Equivalent to / 2
        inputs=[probs1, constant_07],
        outputs=[avg_output1],
    )
    mul2 = gs.Node(
        op="Mul",  # Equivalent to / 2
        inputs=[probs2, constant_03],
        outputs=[avg_output2],
    )
    add = gs.Node(op="Add", inputs=[avg_output1, avg_output2], outputs=[sum_avg])

    graph1.cleanup()
    graph2.cleanup()
    # Combined graph: nodes from both + new nodes; inputs: image + demographics; output: avg_output
    # We put graph1 nodes first, then graph2 nodes (which we've namespaced) so producers appear before consumers.
    combined_graph = gs.Graph(
        nodes=graph1.nodes + graph2.nodes + [softmax1, softmax2, mul1, mul2, add],  #
        inputs=[image_input, demographics_input],
        outputs=[sum_avg],
    )

    # Set opset on the graph for LayerNormalization support (opset 17+)
    combined_graph.opset = 17

    # Cleanup and export - cleanup will remove unused nodes and should also fix ordering where possible
    combined_model = gs.export_onnx(combined_graph.cleanup())

    # Infer shapes to fill in any missing (helps checker)
    combined_model = shape_inference.infer_shapes(combined_model)

    # Optional: Check model
    onnx.checker.check_model(combined_model)

    # Save
    onnx.save(combined_model, output_path)
    print(f"Combined ONNX model saved to {output_path}")
    print(f"Output shape: {output_shape}")

    return combined_model

# dir
model_dir_1 = os.path.join(MODEL_BASE_DIR, "2025-11-27/grose")
model_dir_2 = os.path.join(MODEL_BASE_DIR, "2025-11-27/speechmaster")
model_dir_3 = os.path.join(MODEL_BASE_DIR, "combine/2025-11-27")

# get model list
grose_list = get_model_list(model_dir_1)
speech_list = get_model_list(model_dir_2)

# Usage
for path1 in grose_list:
    for path2 in speech_list:
        model_1_filename = path1.split('/')[-1]
        model_2_filename = path2.split('/')[-1]
        output_filename =  f'{model_1_filename.split("_")[0]}vs{model_2_filename.split("_")[0]}.onnx'
        model_path_3 = os.path.join(model_dir_3, output_filename)
        combined = create_combined_onnx(path1, path2, model_path_3)