# Zero-Shot Experiments

In [1]:
from g4f.client import Client
import re
import time
from pathlib import Path
import trimesh
import contextlib
import tempfile
import pandas as pd
import cadquery as cq
from typing import Callable, Optional
from IPython.display import display
import numpy as np
import open3d as o3d
import ot
from scipy.spatial import KDTree
import os

from io import StringIO

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Pipeline

### LLM Access

In [19]:
def request(model: str, prompt: str, **kwargs) -> str:
    client = Client()

    messages = [{"role": "user", "content": prompt, "additional_data": []}]

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        stream=False,
        # provider=g4f.Provider.Yqcloud,
        verbose=False,
        silent=True,
        web_search=False,
        seed=420,
        **kwargs,
    )

    return response.choices[0].message.content

### Code extraction

In [3]:
def extract_python_code(text: str):
    pattern = r"```python\n(.*?)\n```"
    matches = re.findall(pattern, text, re.DOTALL)
    if len(matches) == 0:
        return text

    return matches[0].strip()

### CadQuery Extraction

In [4]:
def execute_cadquery_code(code: str) -> cq.Workplane:
    safe_globals = {"cq": cq}

    output = StringIO()

    with contextlib.redirect_stdout(output):
        exec(code, safe_globals)

        if "r" in safe_globals and isinstance(safe_globals["r"], cq.Workplane):
            return safe_globals["r"]

    raise RuntimeError("No valid CadQuery object named 'r' found")

### CadQuery to Mesh

In [5]:
def cq_to_trimesh(workplane: cq.Workplane) -> trimesh.Trimesh:
    """
    Converts a CadQuery Workplane object into a trimesh.Trimesh object.
    """
    with tempfile.NamedTemporaryFile(suffix=".stl", delete=False) as temp_file:
        workplane.export(temp_file.name)
        mesh = trimesh.load_mesh(temp_file.name)
    return mesh

### Compute Metrics

In [6]:
def eval_wrapper(error_value: float = 0.0, precision: int = 5, debug: bool = False):
    """Decorator to handle exceptions, round results, and provide a default error value."""

    def decorator(func: Callable) -> Callable:
        def wrapper(*args, **kwargs):
            try:
                result = func(*args, **kwargs)
                return round(float(result), precision)
            except Exception as e:
                if debug:
                    raise
                print(f"Error in {func.__name__}: {e}")
                return error_value

        return wrapper

    return decorator


# --- Helper Functions ---


def sample_surface_points(
    obj: trimesh.Trimesh, num_samples: int, seed: int = 420
) -> np.ndarray:
    """Samples points from a mesh surface."""
    return trimesh.sample.sample_surface(obj, num_samples, seed=seed)[0]


def get_vertices(obj: trimesh.Trimesh, max_points: Optional[int] = None) -> np.ndarray:
    """Gets (or subsamples) vertices from a mesh."""
    vertices = obj.vertices
    if max_points and len(vertices) > max_points:
        indices = np.random.choice(len(vertices), max_points, replace=False)
        return vertices[indices]
    return vertices


def _get_point_distances(
    points1: np.ndarray, points2: np.ndarray
) -> tuple[float, float]:
    """Helper to compute Chamfer and Hausdorff distances between point clouds."""
    tree1, tree2 = KDTree(points1), KDTree(points2)
    dist1, _ = tree1.query(points2, k=1)
    dist2, _ = tree2.query(points1, k=1)
    chamfer_dist = np.mean(np.square(dist1)) + np.mean(np.square(dist2))
    hausdorff_dist = max(np.max(dist1), np.max(dist2))
    return chamfer_dist, hausdorff_dist


def _scalar_similarity(val1: float, val2: float) -> float:
    """Helper to compute similarity between two scalar values."""
    maximum = max(val1, val2)
    return 1.0 - abs(val1 - val2) / maximum if maximum > 0 else 1.0


# --- Metric Functions ---


@eval_wrapper()
def iou(obj1: trimesh.Trimesh, obj2: trimesh.Trimesh) -> float:
    intersection = obj1.intersection(obj2, check_volume=False).volume
    union = obj1.union(obj2, check_volume=False).volume
    return intersection / union if union > 0 else 0.0


@eval_wrapper()
def voxel_iou(
    obj1: trimesh.Trimesh, obj2: trimesh.Trimesh, resolution: int = 64
) -> float:
    v1 = obj1.voxelized(pitch=obj1.scale / resolution).matrix
    v2 = obj2.voxelized(pitch=obj2.scale / resolution).matrix

    shape1, shape2 = np.array(v1.shape), np.array(v2.shape)
    max_shape = np.maximum(shape1, shape2)

    v1_padded = np.zeros(max_shape, dtype=bool)
    v1_padded[tuple(map(slice, shape1))] = v1

    v2_padded = np.zeros(max_shape, dtype=bool)
    v2_padded[tuple(map(slice, shape2))] = v2

    intersection = np.logical_and(v1_padded, v2_padded).sum()
    union = np.logical_or(v1_padded, v2_padded).sum()
    return intersection / union if union > 0 else 0.0


@eval_wrapper()
def inverse_chamfer_distance(
    obj1: trimesh.Trimesh, obj2: trimesh.Trimesh, num_samples: int = 5000
) -> float:
    points1 = sample_surface_points(obj1, num_samples)
    points2 = sample_surface_points(obj2, num_samples)
    chamfer, _ = _get_point_distances(points1, points2)
    return 1.0 - chamfer


@eval_wrapper()
def inverse_chamfer_distance_vertices(
    obj1: trimesh.Trimesh, obj2: trimesh.Trimesh, max_points: int = 5000
) -> float:
    points1 = get_vertices(obj1, max_points)
    points2 = get_vertices(obj2, max_points)
    chamfer, _ = _get_point_distances(points1, points2)
    return 1.0 - chamfer


@eval_wrapper()
def inverse_hausdorff_distance(
    obj1: trimesh.Trimesh, obj2: trimesh.Trimesh, num_samples: int = 5000
) -> float:
    points1 = sample_surface_points(obj1, num_samples)
    points2 = sample_surface_points(obj2, num_samples)
    _, hausdorff = _get_point_distances(points1, points2)
    return 1.0 - hausdorff


@eval_wrapper()
def inverse_hausdorff_distance_vertices(
    obj1: trimesh.Trimesh, obj2: trimesh.Trimesh, max_points: int = 5000
) -> float:
    points1 = get_vertices(obj1, max_points)
    points2 = get_vertices(obj2, max_points)
    _, hausdorff = _get_point_distances(points1, points2)
    return 1.0 - hausdorff


@eval_wrapper()
def inverse_wasserstein_distance(
    obj1: trimesh.Trimesh, obj2: trimesh.Trimesh, num_samples: int = 1000
) -> float:
    points1 = sample_surface_points(obj1, num_samples)
    points2 = sample_surface_points(obj2, num_samples)
    a = b = np.ones((num_samples,)) / num_samples
    cost_matrix = ot.dist(points1, points2, metric="sqeuclidean")
    emd2 = ot.emd2(a, b, cost_matrix)
    return 1.0 - emd2  # type: ignore


@eval_wrapper()
def volume_similarity(obj1: trimesh.Trimesh, obj2: trimesh.Trimesh) -> float:
    if not obj1.is_watertight or not obj2.is_watertight:
        return 0.0
    return _scalar_similarity(obj1.volume, obj2.volume)


@eval_wrapper()
def area_similarity(obj1: trimesh.Trimesh, obj2: trimesh.Trimesh) -> float:
    return _scalar_similarity(obj1.area, obj2.area)


@eval_wrapper()
def inverse_centroid_distance(obj1: trimesh.Trimesh, obj2: trimesh.Trimesh) -> float:
    distance = np.linalg.norm(obj1.centroid - obj2.centroid)
    return float(1.0 - distance)


@eval_wrapper()
def inertia_similarity(obj1: trimesh.Trimesh, obj2: trimesh.Trimesh) -> float:
    i1, i2 = obj1.moment_inertia, obj2.moment_inertia
    norm = np.linalg.norm(i1) + np.linalg.norm(i2)
    if norm == 0:
        return 1.0
    return float(1.0 - np.linalg.norm(i1 - i2) / norm)


METRICS_DICT: dict[str, Callable] = {
    "iou": iou,
    "viou": voxel_iou,
    "cd": inverse_chamfer_distance,
    # "cdv": inverse_chamfer_distance_vertices,
    "hd": inverse_hausdorff_distance,
    # "hdv": inverse_hausdorff_distance_vertices,
    "wd": inverse_wasserstein_distance,
    # "vs": volume_similarity,
    "as": area_similarity,
    # "ctd": inverse_centroid_distance,
    "is": inertia_similarity,
}


ORIENT_METRICS_DICT: dict[str, Callable] = {
    # "osi": orientation_similarity_pca_invariant,
    # "osf": orientation_similarity_faces,
    # "osv": orientation_similarity_vertices,
}


def center_mesh(mesh: trimesh.Trimesh) -> trimesh.Trimesh:
    # Get the centroid of the mesh
    centroid = mesh.centroid

    # Create a translation matrix
    T = np.eye(4)
    T[:3, 3] = -centroid  # translate by negative centroid

    # Apply transformation
    centered = mesh.copy()
    centered.apply_transform(T)
    return centered


def transform(obj: trimesh.Trimesh) -> trimesh.Trimesh:
    """Normalizes a mesh to be centered and fit within a unit cube."""
    center = obj.bounds.mean(axis=0)
    obj.apply_translation(-center)
    scale = obj.extents.max()
    if scale > 1e-7:
        # if scale > 1:
        obj.apply_scale(1.0 / scale)
    return center_mesh(obj)


def tri_to_o(trimesh_mesh: trimesh.Trimesh) -> o3d.geometry.TriangleMesh:
    vertices = np.asarray(trimesh_mesh.vertices)
    triangles = np.asarray(trimesh_mesh.faces)

    o3d_mesh = o3d.geometry.TriangleMesh()
    o3d_mesh.vertices = o3d.utility.Vector3dVector(vertices)
    o3d_mesh.triangles = o3d.utility.Vector3iVector(triangles)

    return o3d_mesh


def o_to_tri(o3d_mesh):
    vertices = np.asarray(o3d_mesh.vertices)
    faces = np.asarray(o3d_mesh.triangles)

    return trimesh.Trimesh(vertices=vertices, faces=faces)


def preprocess_point_cloud(pcd, voxel_size):
    pcd_down = pcd.voxel_down_sample(voxel_size)

    radius_normal = voxel_size * 2
    pcd_down.estimate_normals(
        o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30)
    )

    radius_feature = voxel_size * 5
    pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
        pcd_down,
        o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100),
    )
    return pcd_down, pcd_fpfh


def execute_global_registration(
    source_down, target_down, source_fpfh, target_fpfh, voxel_size
):
    distance_threshold = voxel_size * 1.5
    result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
        source_down,
        target_down,
        source_fpfh,
        target_fpfh,
        True,
        distance_threshold,
        o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
        3,
        [
            o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
            o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(
                distance_threshold
            ),
        ],
        o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999),
    )
    return result


def align_rot(
    source_mesh: trimesh.Trimesh,
    target_mesh: trimesh.Trimesh,
    n_points: int = 10000,
    voxel_size: float = 0.05,
) -> trimesh.Trimesh:
    # Convert to Open3D triangle meshes and sample point clouds
    target_pcd = tri_to_o(target_mesh).sample_points_uniformly(n_points)
    source_pcd = tri_to_o(source_mesh).sample_points_uniformly(n_points)
    # Preprocess point clouds
    source_down, source_fpfh = preprocess_point_cloud(source_pcd, voxel_size)
    target_down, target_fpfh = preprocess_point_cloud(target_pcd, voxel_size)

    # Register point clouds
    result_ransac = execute_global_registration(
        source_down, target_down, source_fpfh, target_fpfh, voxel_size
    )

    # Transform original Open3D mesh and convert back to trimesh
    source_o3d = tri_to_o(source_mesh)
    source_o3d.transform(result_ransac.transformation)

    return o_to_tri(source_o3d)


def evaluate(
    target_obj: trimesh.Trimesh, predicted_obj: trimesh.Trimesh, align: bool = True
) -> dict[str, float]:
    """Computes all metrics for two (normalized) meshes."""
    target_obj = transform(target_obj.copy())

    predicted_obj = transform(predicted_obj.copy())
    aligned_obj = predicted_obj.copy()

    if align:
        aligned_obj = transform(align_rot(aligned_obj, target_obj))

    return {
        **{
            name: metric_fn(target_obj, aligned_obj)
            for name, metric_fn in METRICS_DICT.items()
        },
        **{
            name: metric_fn(target_obj, predicted_obj)
            for name, metric_fn in ORIENT_METRICS_DICT.items()
        },
    }

### Plotting

In [7]:
def plot_mesh_comparison_scene(
    meshes: list[Optional[trimesh.Trimesh]],
    colors: Optional[list[Optional[np.ndarray]]] = None,
    align: Optional[bool] = False,
):
    scene = trimesh.Scene()
    valid_meshes = []
    valid_colors = []

    # Filter out None or empty/broken meshes
    for mesh, color in zip(meshes, colors or [None] * len(meshes)):
        if mesh is None or not isinstance(mesh, trimesh.Trimesh) or mesh.is_empty:
            print("Warning: Skipping empty or invalid mesh.")
            continue
        valid_meshes.append(mesh)
        valid_colors.append(color)

    if not valid_meshes:
        raise ValueError("No valid meshes to display.")

    valid_meshes = [transform(m) for m in valid_meshes]
    # Compute offset based on valid meshes only
    offset = max(m.extents[0] for m in valid_meshes) * 1.2

    # Center the baseline mesh (first one)
    baseline = valid_meshes[0]

    for idx, (mesh, color) in enumerate(zip(valid_meshes, valid_colors)):
        mesh.visual.face_colors = None if color is None else color
        if idx > 0 and align:
            # mesh = align_mesh(mesh, baseline)
            mesh = transform(align_rot(mesh, baseline))
        scene.add_geometry(
            mesh,
            transform=trimesh.transformations.translation_matrix([offset * idx, 0, 0]),
        )

    return scene.show()

### Whole pipeline

In [8]:
def zero_shot(
    model: str,
    prompt: str,
    baseline_path: Path,
    save_path: Optional[Path] = None,
    plot_meshes: bool = True,
    align_on_plot: bool = True,
    align_on_evaluate: bool = True,
) -> tuple[str, dict]:
    response = request(model, prompt)
    code = extract_python_code(response)

    try:
        workplane = execute_cadquery_code(code)
        predicted_obj = cq_to_trimesh(workplane)
        if save_path:
            predicted_obj.export(save_path)
        target_obj = trimesh.load_mesh(baseline_path)
        metrics = evaluate(target_obj, predicted_obj, align_on_evaluate)

        if plot_meshes:
            display(
                plot_mesh_comparison_scene(
                    meshes=[target_obj, predicted_obj],
                    colors=[
                        np.array([0, 255, 0]),
                        np.array([255, 0, 0]),
                    ],
                    align=align_on_plot,
                )
            )
    except Exception as e:
        print(f"Error executing code: {e}")
        return code, {}

    return code, metrics

In [24]:
def run_experiment_suite(
    model: str,
    prompt: str,
    baseline_path: Path,
    results_csv_path: Path,
    num_repeats: int = 5,
    num_retries: int = 10,
    generated_meshes_path: Optional[Path] = None,
    **zero_shot_kwargs,
) -> pd.DataFrame | None:
    METRIC_NAMES = ["iou", "viou", "cd", "wd", "as", "is"]
    all_results = []

    if generated_meshes_path:
        os.makedirs(generated_meshes_path, exist_ok=True)

    _zero_shot_plot_meshes = zero_shot_kwargs.get("plot_meshes", False)
    if "plot_meshes" in zero_shot_kwargs:
        zero_shot_kwargs.pop("plot_meshes")

    for i in range(num_repeats):
        print(f"\n--- Running Experiment {i + 1}/{num_repeats} ---")

        save_path = None
        if generated_meshes_path:
            save_path = generated_meshes_path / f"{i + 1}.stl"

        # Run one experiment
        for j in range(num_retries):
            try:
                code, metrics = zero_shot(
                    model=model,
                    prompt=prompt,
                    baseline_path=baseline_path,
                    save_path=save_path,
                    **zero_shot_kwargs,
                    # plot_meshes=False if i < (num_repeats-1) else zero_shot_plot_meshes
                    plot_meshes=False,
                )

                if len(code) < 40:
                    continue
                break
            except Exception as e:
                print(f"External Error: {e}")
                if str(e) in ["No .har file found"]:
                    continue
                code = ""
                metrics = None

        if metrics:
            combined_score = sum(metrics.get(k, 0) for k in METRIC_NAMES)
            row_data = {
                "code": code,
                **metrics,
                "combined_score": combined_score,
            }
        else:  # If the run failed
            row_data = {
                "code": code,
                **{metric: 0 for metric in METRIC_NAMES},
                "combined_score": 0,
            }

        all_results.append(row_data)
        print(f"Combined score: {row_data['combined_score']}")
        time.sleep(2)

    if not all_results:
        print("No results were generated to save.")
        return

    df = pd.DataFrame(all_results)

    column_order = ["code"] + METRIC_NAMES + ["combined_score"]
    for col in column_order:
        if col not in df.columns:
            df[col] = None
    df = df[column_order]

    results_csv_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(results_csv_path, index=False)
    print(f"\nExperiment suite finished. Results saved to '{results_csv_path}'")

    return df

## Dataset

In [10]:
def get_prompt(description: str) -> str:
    return f"""
You are an expert in parametric 3D modeling using CadQuery and Python. Your task is to write a Python code using the CadQuery library that generates a 3D model matching a reference information about the shape as closely as possible.

Requirements:
1. The code must use CadQuery primitives and operations.
2. The code should assign final CadQuery solid object (`cq.Workplane` with 3D geometry) to a variable 'r' (like `r=...`), so that 'r' can be imported from other python file
3. The script must be executable in a standard Python environment with CadQuery installed (no other packages).
4. Remove all comments and descriptions from the solution code

You have a text shape description with important information.

Shape description:
{description}

"""

In [11]:
def tube(
    model: str,
    plot_meshes: bool = True,
    align_on_plot: bool = True,
    align_on_evaluate: bool = True,
    num_repeats: int = 10,
):
    desc = """
    A tall vertical cylinder (116mm diameter, 200mm height) is partially subtracted by a smaller offset cylinder (66mm diameter, 200mm height) In the center of cylinder
    """

    _df = run_experiment_suite(
        model,
        get_prompt(desc),
        Path("generated/tube.stl"),
        Path(f"zero_shot/tube_{model}_zero_shot.csv"),
        generated_meshes_path=Path(f"zero_shot/tube_{model}/"),
        num_repeats=num_repeats,
        plot_meshes=plot_meshes,
        align_on_plot=align_on_plot,
        align_on_evaluate=align_on_evaluate,
    )

    # print(f"Metrics:\n{metrics}\n")
    # print(f"Generated Code:\n\n{code}")

In [12]:
def gear(
    model: str,
    plot_meshes: bool = True,
    align_on_plot: bool = True,
    align_on_evaluate: bool = True,
    num_repeats: int = 10,
):
    desc = """
    Gear wheel: inner radius of 10 mm, outer radius of 40 mm, thickness of 20 mm, 6 rectangular cogs as thick as a gear, protruding from the gear to a width of 20 mm and a height of 10 mm.
    The cogs must be inserted into the gear by 2 mm.
    """

    _df = run_experiment_suite(
        model,
        get_prompt(desc),
        Path("generated/gear.stl"),
        Path(f"zero_shot/gear_{model}_zero_shot.csv"),
        generated_meshes_path=Path(f"zero_shot/gear_{model}/"),
        num_repeats=num_repeats,
        plot_meshes=plot_meshes,
        align_on_plot=align_on_plot,
        align_on_evaluate=align_on_evaluate,
    )

    # print(f"Metrics:\n{metrics}\n")
    # print(f"Generated Code:\n\n{code}")

In [13]:
def open_box(
    model: str,
    plot_meshes: bool = True,
    align_on_plot: bool = True,
    align_on_evaluate: bool = True,
    num_repeats: int = 10,
):
    desc = """
    Box with length of 150mm, width of 100mm and bottom thickness 10 mm. Walls height: 40 mm.
    Walls along length sides have thicknesses of 15 mm.
    Walls along width have thicknesses of 30mm.
    """

    _df = run_experiment_suite(
        model,
        get_prompt(desc),
        Path("generated/open_box.stl"),
        Path(f"zero_shot/open_box_{model}_zero_shot.csv"),
        generated_meshes_path=Path(f"zero_shot/open_box_{model}/"),
        num_repeats=num_repeats,
        plot_meshes=plot_meshes,
        align_on_plot=align_on_plot,
        align_on_evaluate=align_on_evaluate,
    )

    # print(f"Metrics:\n{metrics}\n")
    # print(f"Generated Code:\n\n{code}")

In [14]:
def ladder(
    model: str,
    plot_meshes: bool = True,
    align_on_plot: bool = True,
    align_on_evaluate: bool = True,
    num_repeats: int = 10,
):
    desc = """
    The resulting object is a solid, monolithic block measuring 60 mm wide,
    80 mm high, and 50 mm deep, with a two-step profile on its front face composed of 40 mm risers and 30 mm treads.
    The block is bisected by a full-width planar cut on its right side that connects the top-back edge to the bottom-front edge, creating a new face angled at approximately 58 degrees relative to the base. All other primary planes and unspecified corners remain mutually orthogonal at 90 degrees.
    """

    _df = run_experiment_suite(
        model,
        get_prompt(desc),
        Path("generated/ladder.stl"),
        Path(f"zero_shot/ladder_{model}_zero_shot.csv"),
        generated_meshes_path=Path(f"zero_shot/ladder_{model}/"),
        num_repeats=num_repeats,
        plot_meshes=plot_meshes,
        align_on_plot=align_on_plot,
        align_on_evaluate=align_on_evaluate,
    )

    # print(f"Metrics:\n{metrics}\n")
    # print(f"Generated Code:\n\n{code}")

In [15]:
def spheres(
    model: str,
    plot_meshes: bool = True,
    align_on_plot: bool = True,
    align_on_evaluate: bool = True,
    num_repeats: int = 10,
):
    desc = """
    Big sphere with radius 100mm, It is modified by removing the part, corresponding to the small sphere with radius 80mm and shifted from the center of big sphere 20mm up and 70mm left.
    Another small sphere of radius 50mm touches the bottom of big sphere from the outside
    """

    _df = run_experiment_suite(
        model,
        get_prompt(desc),
        Path("generated/spheres.stl"),
        Path(f"zero_shot/spheres_{model}_zero_shot.csv"),
        generated_meshes_path=Path(f"zero_shot/spheres_{model}/"),
        num_repeats=num_repeats,
        plot_meshes=plot_meshes,
        align_on_plot=align_on_plot,
        align_on_evaluate=align_on_evaluate,
    )

    # print(f"Metrics:\n{metrics}\n")
    # print(f"Generated Code:\n\n{code}")

## Experiments

### Tube

#### Model: qwen-2.5-72b

In [16]:
tube("qwen-2.5-72b")


--- Running Experiment 1/10 ---
Combined score: 4.75883

--- Running Experiment 2/10 ---
Combined score: 5.22138

--- Running Experiment 3/10 ---
Combined score: 4.83028

--- Running Experiment 4/10 ---
Combined score: 5.25759

--- Running Experiment 5/10 ---
Combined score: 5.10607

--- Running Experiment 6/10 ---
Combined score: 4.81772

--- Running Experiment 7/10 ---
Combined score: 4.7771

--- Running Experiment 8/10 ---
Combined score: 5.20644

--- Running Experiment 9/10 ---
Combined score: 4.71948

--- Running Experiment 10/10 ---
Combined score: 4.80612

Experiment suite finished. Results saved to 'zero_shot/tube_qwen-2.5-72b_zero_shot.csv'


#### Model: qwen-2.5-coder-32b

In [None]:
tube("qwen-2.5-coder-32b")


--- Running Experiment 1/10 ---
Combined score: 5.54685

--- Running Experiment 2/10 ---
Error executing code: list index out of range
Combined score: 0

--- Running Experiment 3/10 ---
Error executing code: list index out of range
Combined score: 0

--- Running Experiment 4/10 ---
Combined score: 5.16559

--- Running Experiment 5/10 ---
Combined score: 5.9317

--- Running Experiment 6/10 ---
Combined score: 5.23066

--- Running Experiment 7/10 ---
Combined score: 5.61344

--- Running Experiment 8/10 ---
Error executing code: list index out of range
Combined score: 0

--- Running Experiment 9/10 ---
Error executing code: list index out of range
Combined score: 0

--- Running Experiment 10/10 ---
Error executing code: list index out of range
Combined score: 0

Experiment suite finished. Results saved to 'zero_shot/tube_qwen-2.5-coder-32b_zero_shot.csv'


#### Model: gpt-o4-mini

In [19]:
tube("o4-mini")


--- Running Experiment 1/10 ---
Combined score: 5.90918

--- Running Experiment 2/10 ---
Combined score: 5.766220000000001

--- Running Experiment 3/10 ---
Combined score: 5.65876

--- Running Experiment 4/10 ---
Combined score: 5.9093800000000005

--- Running Experiment 5/10 ---
Combined score: 5.24488

--- Running Experiment 6/10 ---
Combined score: 5.90128

--- Running Experiment 7/10 ---
Combined score: 5.27279

--- Running Experiment 8/10 ---
Combined score: 5.72854

--- Running Experiment 9/10 ---
Combined score: 5.94564

--- Running Experiment 10/10 ---
Combined score: 5.16979

Experiment suite finished. Results saved to 'zero_shot/tube_o4-mini_zero_shot.csv'


### Gear

#### Model: qwen-2.5-72b

In [18]:
gear("qwen-2.5-72b")


--- Running Experiment 1/10 ---
Error executing code: 'Vector' object has no attribute 'rotated'
Combined score: 0

--- Running Experiment 2/10 ---
Combined score: 4.4436

--- Running Experiment 3/10 ---
Combined score: 4.49764

--- Running Experiment 4/10 ---
Combined score: 4.38339

--- Running Experiment 5/10 ---
Combined score: 4.44698

--- Running Experiment 6/10 ---
Combined score: 4.4572199999999995

--- Running Experiment 7/10 ---
Combined score: 4.43957

--- Running Experiment 8/10 ---
Combined score: 4.3815

--- Running Experiment 9/10 ---
Combined score: 4.38437

--- Running Experiment 10/10 ---
Combined score: 4.44561

Experiment suite finished. Results saved to 'zero_shot/gear_qwen-2.5-72b_zero_shot.csv'


#### Model: qwen-2.5-coder-32b

In [19]:
gear("qwen-2.5-coder-32b")


--- Running Experiment 1/10 ---
Error executing code: Can not return the Nth element of an empty list
Combined score: 0

--- Running Experiment 2/10 ---
Error executing code: 'Workplane' object has no attribute 'gear'
Combined score: 0

--- Running Experiment 3/10 ---
Error executing code: Can not return the Nth element of an empty list
Combined score: 0

--- Running Experiment 4/10 ---
Error executing code: 'Workplane' object has no attribute 'gear'
Combined score: 0

--- Running Experiment 5/10 ---
Error executing code: 'Workplane' object has no attribute 'gear'
Combined score: 0

--- Running Experiment 6/10 ---
Error executing code: Can not return the Nth element of an empty list
Combined score: 0

--- Running Experiment 7/10 ---
Error executing code: Can not return the Nth element of an empty list
Combined score: 0

--- Running Experiment 8/10 ---
Error executing code: Can not return the Nth element of an empty list
Combined score: 0

--- Running Experiment 9/10 ---
Error executin

#### Model: gpt-o4-mini

In [20]:
# gear("gpt-4.1-mini")
gear("o4-mini")


--- Running Experiment 1/10 ---
Combined score: 4.47665

--- Running Experiment 2/10 ---
Combined score: 4.44525

--- Running Experiment 3/10 ---
Error executing code: 'OCP.TopLoc.TopLoc_Location' object has no attribute 'getAttribute'
Combined score: 0

--- Running Experiment 4/10 ---
Combined score: 4.212

--- Running Experiment 5/10 ---
Error executing code: Fillets requires that edges be selected
Combined score: 0

--- Running Experiment 6/10 ---
Combined score: 3.67421

--- Running Experiment 7/10 ---
Combined score: 4.32

--- Running Experiment 8/10 ---
Combined score: 4.54431

--- Running Experiment 9/10 ---
Combined score: 4.50567

--- Running Experiment 10/10 ---
Combined score: 4.42464

Experiment suite finished. Results saved to 'zero_shot/gear_gpt-4.1-mini_zero_shot.csv'


### Open box

#### Model: qwen-2.5-72b

In [20]:
open_box("qwen-2.5-72b")


--- Running Experiment 1/10 ---
Combined score: 3.98437

--- Running Experiment 2/10 ---
Combined score: 4.1654800000000005

--- Running Experiment 3/10 ---
Combined score: 4.06401

--- Running Experiment 4/10 ---
Combined score: 4.17215

--- Running Experiment 5/10 ---
Combined score: 4.08194

--- Running Experiment 6/10 ---
Combined score: 4.1139600000000005

--- Running Experiment 7/10 ---
Combined score: 4.3168

--- Running Experiment 8/10 ---
Combined score: 4.19172

--- Running Experiment 9/10 ---
Combined score: 4.12845

--- Running Experiment 10/10 ---
Combined score: 4.16338

Experiment suite finished. Results saved to 'zero_shot/open_box_qwen-2.5-72b_zero_shot.csv'


#### Model: qwen-2.5-coder-32b

In [21]:
open_box("qwen-2.5-coder-32b")


--- Running Experiment 1/10 ---
Combined score: 4.23148

--- Running Experiment 2/10 ---
Combined score: 4.0703

--- Running Experiment 3/10 ---
Combined score: 4.22053

--- Running Experiment 4/10 ---
Combined score: 4.31126

--- Running Experiment 5/10 ---
Combined score: 3.185

--- Running Experiment 6/10 ---
Combined score: 2.9887799999999998

--- Running Experiment 7/10 ---
Combined score: 3.42699

--- Running Experiment 8/10 ---
Combined score: 4.24426

--- Running Experiment 9/10 ---
Combined score: 4.11606

--- Running Experiment 10/10 ---
Combined score: 3.22801

Experiment suite finished. Results saved to 'zero_shot/open_box_qwen-2.5-coder-32b_zero_shot.csv'


#### Model: gpt-o4-mini

In [None]:
# open_box("gpt-4.1-mini")
open_box("o4-mini")


--- Running Experiment 1/10 ---
Combined score: 4.63408

--- Running Experiment 2/10 ---
Combined score: 4.73234

--- Running Experiment 3/10 ---
Combined score: 4.28442

--- Running Experiment 4/10 ---
Combined score: 3.45331

--- Running Experiment 5/10 ---
Combined score: 2.45934

--- Running Experiment 6/10 ---
Combined score: 3.81969

--- Running Experiment 7/10 ---
Combined score: 2.95451

--- Running Experiment 8/10 ---
Combined score: 3.13128

--- Running Experiment 9/10 ---
Combined score: 3.90321

--- Running Experiment 10/10 ---
Combined score: 4.39453

Experiment suite finished. Results saved to 'zero_shot/open_box_gpt-4.1-mini_zero_shot.csv'


### Ladder

#### Model: qwen-2.5-72b

In [22]:
ladder("qwen-2.5-72b")


--- Running Experiment 1/10 ---
Error executing code: BRep_API: command not done
Combined score: 0

--- Running Experiment 2/10 ---
Combined score: 3.86702

--- Running Experiment 3/10 ---
Combined score: 3.67951

--- Running Experiment 4/10 ---
Combined score: 3.75921

--- Running Experiment 5/10 ---
Combined score: 3.75036

--- Running Experiment 6/10 ---
Combined score: 3.73618

--- Running Experiment 7/10 ---
Error executing code: BRep_API: command not done
Combined score: 0

--- Running Experiment 8/10 ---
Combined score: 3.76526

--- Running Experiment 9/10 ---
Combined score: 3.6976

--- Running Experiment 10/10 ---
Combined score: 3.68638

Experiment suite finished. Results saved to 'zero_shot/ladder_qwen-2.5-72b_zero_shot.csv'


#### Model: qwen-2.5-coder-32b

In [23]:
ladder("qwen-2.5-coder-32b")


--- Running Experiment 1/10 ---
Combined score: 3.74189

--- Running Experiment 2/10 ---
Combined score: 3.78631

--- Running Experiment 3/10 ---
Combined score: 3.84661

--- Running Experiment 4/10 ---
Combined score: 3.8189699999999998

--- Running Experiment 5/10 ---
Combined score: 3.74382

--- Running Experiment 6/10 ---
Combined score: 4.06152

--- Running Experiment 7/10 ---
Combined score: 3.7071199999999997

--- Running Experiment 8/10 ---
Combined score: 3.7552499999999998

--- Running Experiment 9/10 ---
Combined score: 3.92842

--- Running Experiment 10/10 ---
Combined score: 3.62857

Experiment suite finished. Results saved to 'zero_shot/ladder_qwen-2.5-coder-32b_zero_shot.csv'


#### Model: gpt-o4-mini

In [None]:
# ladder("gpt-4")
ladder("o4-mini")


--- Running Experiment 1/10 ---
Combined score: 3.85698

--- Running Experiment 2/10 ---
Error executing code: Cannot build face(s): wires not planar
Combined score: 0

--- Running Experiment 3/10 ---
Combined score: 4.11967

--- Running Experiment 4/10 ---
Combined score: 3.28158

--- Running Experiment 5/10 ---
Combined score: 3.80841

--- Running Experiment 6/10 ---
Error executing code: You have to keep at least one half
Combined score: 0

--- Running Experiment 7/10 ---
Combined score: 2.89912

--- Running Experiment 8/10 ---
Combined score: 3.76101

--- Running Experiment 9/10 ---
Combined score: 4.16301

--- Running Experiment 10/10 ---
Combined score: 4.01975

Experiment suite finished. Results saved to 'zero_shot/ladder_gpt-4_zero_shot.csv'


### Spheres

#### Model: qwen-2.5-72b

In [24]:
spheres("qwen-2.5-72b")


--- Running Experiment 1/10 ---
Combined score: 3.08485

--- Running Experiment 2/10 ---
Combined score: 3.18883

--- Running Experiment 3/10 ---
Combined score: 3.09856

--- Running Experiment 4/10 ---
Combined score: 3.14558

--- Running Experiment 5/10 ---
Combined score: 3.13778

--- Running Experiment 6/10 ---
Combined score: 3.10743

--- Running Experiment 7/10 ---
Combined score: 3.21067

--- Running Experiment 8/10 ---
Combined score: 3.08923

--- Running Experiment 9/10 ---
Combined score: 3.02555

--- Running Experiment 10/10 ---
Combined score: 3.18969

Experiment suite finished. Results saved to 'zero_shot/spheres_qwen-2.5-72b_zero_shot.csv'


#### Model: qwen-2.5-coder-32b

In [25]:
spheres("qwen-2.5-coder-32b")


--- Running Experiment 1/10 ---
Combined score: 2.8313800000000002

--- Running Experiment 2/10 ---
Combined score: 2.82884

--- Running Experiment 3/10 ---
Combined score: 2.8318

--- Running Experiment 4/10 ---
Combined score: 2.8304899999999997

--- Running Experiment 5/10 ---
Combined score: 2.82606

--- Running Experiment 6/10 ---
Combined score: 2.83203

--- Running Experiment 7/10 ---
Combined score: 2.83181

--- Running Experiment 8/10 ---
Combined score: 2.8318

--- Running Experiment 9/10 ---
Combined score: 2.82613

--- Running Experiment 10/10 ---
Combined score: 2.82835

Experiment suite finished. Results saved to 'zero_shot/spheres_qwen-2.5-coder-32b_zero_shot.csv'


#### Model: gpt-o4-mini

In [None]:
# spheres("gpt-4")
spheres("o4-mini")


--- Running Experiment 1/10 ---
Combined score: 3.10005

--- Running Experiment 2/10 ---
Error executing code: GeomAPI_ProjectPointOnSurf::LowerDistanceParameters
Combined score: 0

--- Running Experiment 3/10 ---
Combined score: 2.9404

--- Running Experiment 4/10 ---
Combined score: 3.86352

--- Running Experiment 5/10 ---
Combined score: 5.20708

--- Running Experiment 6/10 ---
Combined score: 2.99611

--- Running Experiment 7/10 ---
Error executing code: GeomAPI_ProjectPointOnSurf::LowerDistanceParameters
Combined score: 0

--- Running Experiment 8/10 ---
Combined score: 3.11424

--- Running Experiment 9/10 ---
Error executing code: GeomAPI_ProjectPointOnSurf::LowerDistanceParameters
Combined score: 0

--- Running Experiment 10/10 ---
Combined score: 3.12652

Experiment suite finished. Results saved to 'zero_shot/spheres_gpt-4_zero_shot.csv'
