In [1]:
import numpy as np
from d_imm.splitters import get_all_mistakes

# Test data
X = np.array([
    [1.0, 2.0],
    [1.5, 3.0],
    [3.0, 6.0],
    [2.5, 5.0],
    [2.0, 4.5]  # This point is closer to cluster 1 center but assigned to cluster 0
], dtype=np.float64)

y = np.array([0, 0, 1, 1, 0], dtype=np.int32)  # Labels with a mistake in the last point

centers = np.array([
    [1.25, 2.5],  # Center of cluster 0
    [2.75, 5.5]   # Center of cluster 1
], dtype=np.float64)

valid_centers = np.array([1, 1], dtype=np.int32)  # Both centers are valid
valid_cols = np.array([1, 1], dtype=np.int32)  # Both features are valid

# Call the function
results = get_all_mistakes(X, y, centers, valid_centers, valid_cols, njobs=1)

# Print results
for result in results:
    print(result)

{'feature': 0, 'threshold': 1.25, 'mistakes': 1}
{'feature': 0, 'threshold': 1.25, 'mistakes': 2}
{'feature': 0, 'threshold': 1.5, 'mistakes': 1}
{'feature': 0, 'threshold': 2.0, 'mistakes': 0}
{'feature': 0, 'threshold': 2.5, 'mistakes': 1}
{'feature': 1, 'threshold': 2.5, 'mistakes': 1}
{'feature': 1, 'threshold': 2.5, 'mistakes': 2}
{'feature': 1, 'threshold': 3.0, 'mistakes': 1}
{'feature': 1, 'threshold': 4.5, 'mistakes': 0}
{'feature': 1, 'threshold': 5.0, 'mistakes': 1}


In [1]:
import numpy as np
from collections import namedtuple

# Define the named tuples
Instance = namedtuple("Instance", ["features", "label", "weight"])
Split = namedtuple("Split", ["feature_index", "threshold", "categories", "is_continuous"])

# Test data
X = np.array([
    [1.0, 2.0],
    [1.5, 3.0],
    [3.0, 6.0],
    [2.5, 5.0],
    [2.0, 4.5]  # This point is closer to cluster 1 center but assigned to cluster 0
], dtype=np.float64)

y = np.array([0, 0, 1, 1, 0], dtype=np.int32)  # Labels with a mistake in the last point

centers = np.array([
    [1.25, 2.5],  # Center of cluster 0
    [2.75, 5.5]   # Center of cluster 1
], dtype=np.float64)

valid_centers = np.array([1, 1], dtype=np.int32)  # Both centers are valid
valid_cols = np.array([1, 1], dtype=np.int32)  # Both features are valid

# Define the histogram structure using Split named tuples
histogram = [
    [  # Feature 0 thresholds
        Split(feature_index=0, threshold=np.float64(1.00), categories=None, is_continuous=True),  
        Split(feature_index=0, threshold=np.float64(2.45), categories=None, is_continuous=True)   
    ],
    [  # Feature 1 thresholds
        Split(feature_index=1, threshold=np.float64(2.0), categories=None, is_continuous=True),   
        Split(feature_index=1, threshold=np.float64(5.0), categories=None, is_continuous=True)    
    ]
]

from d_imm.splitters import get_all_mistakes_histogram

# Call the function
results = get_all_mistakes_histogram(X, y, centers, valid_centers, valid_cols, histogram, njobs=1, sorted=True)

# Print results
for result in results:
    
    print(result)

{'feature': 0, 'threshold': 1.25, 'mistakes': 2}
{'feature': 0, 'threshold': 2.45, 'mistakes': 0}
{'feature': 1, 'threshold': 2.5, 'mistakes': 2}
{'feature': 1, 'threshold': 5.0, 'mistakes': 1}
