In [3]:
# A flag to control the flow in case of errors
error_occurred = False

# Step 1: Import the package
try:
    from kmodes.kmodes import KModes
    from kmodes.kprototypes import KPrototypes
    import numpy as np
    print("Step 1: Importing kmodes and required dependencies passed.")
except ImportError:
    print("Step 1 failed: 'kmodes' or required dependencies are not installed.")
    error_occurred = True
except Exception as e:
    print(f"Step 1 failed with an unexpected error: {str(e)}")
    error_occurred = True

if error_occurred:
    print("Stopping execution due to an error in Step 1.")
else:
    # Step 2: Generate synthetic categorical data for testing
    try:
        data = np.array([
            ['green', 'M', 10],
            ['blue', 'L', 20],
            ['red', 'XL', 30],
            ['green', 'S', 40],
            ['blue', 'M', 50],
            ['red', 'L', 60],
        ])
        print("Step 2: Generating synthetic categorical data passed.")
    except Exception as e:
        print(f"Step 2 failed: {str(e)}")
        error_occurred = True

if not error_occurred:
    # Step 3: Apply KModes clustering
    try:
        km = KModes(n_clusters=2, init='Huang', n_init=5, verbose=1)
        clusters = km.fit_predict(data)

        assert len(set(clusters)) <= 2, f"Expected 2 clusters, got {len(set(clusters))}."
        print(f"Step 3: Applying KModes clustering passed. Cluster labels: {clusters}")
    except Exception as e:
        print(f"Step 3 failed: {str(e)}")
        error_occurred = True

if not error_occurred:
    # Step 4: Apply KPrototypes clustering (Mixed categorical and numerical data)
    try:
        data_mixed = np.array([
            ['green', 'M', 10],
            ['blue', 'L', 20],
            ['red', 'XL', 30],
            ['green', 'S', 40],
            ['blue', 'M', 50],
            ['red', 'L', 60],
        ], dtype=object)

        kproto = KPrototypes(n_clusters=2, init='Cao', verbose=1)
        clusters_mixed = kproto.fit_predict(data_mixed, categorical=[0, 1])

        assert len(set(clusters_mixed)) <= 2, f"Expected 2 clusters, got {len(set(clusters_mixed))}."
        print(f"Step 4: Applying KPrototypes clustering passed. Cluster labels: {clusters_mixed}")
    except Exception as e:
        print(f"Step 4 failed: {str(e)}")
        error_occurred = True

if not error_occurred:
    # Step 5: Validate cluster centroids
    try:
        # Validate centroids for KModes
        centroids = km.cluster_centroids_
        assert centroids.shape == (2, data.shape[1]), f"Expected centroids shape (2, {data.shape[1]}), got {centroids.shape}."

        # Validate centroids for KPrototypes
        centroids_mixed = kproto.cluster_centroids_

        # Checking centroids for each feature
        for i, centroid in enumerate(centroids_mixed):
            print(f"Centroid {i} shape: {centroid.shape}")
            print(f"Centroid {i} values: {centroid}")
            
            # Check if the centroid shape matches the expected number of features
            expected_shape = (data.shape[1],)  # Based on the number of features in data
            assert centroid.shape == expected_shape, f"Expected centroid {i} shape {expected_shape}, got {centroid.shape}."

        print("Step 5: Validating cluster centroids passed.")
    except Exception as e:
        print(f"Step 5 failed: {str(e)}")
        error_occurred = True

# Final Confirmation
if not error_occurred:
    print("All extensive tests for the 'kmodes' package completed successfully.")
else:
    print("One or more steps failed. Please check the error messages above.")


Step 1: Importing kmodes and required dependencies passed.
Step 2: Generating synthetic categorical data passed.
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 0, cost: 9.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 0, cost: 9.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 0, cost: 9.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 4, iteration: 1/100, moves: 0, cost: 10.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 5, iteration: 1/100, moves: 0, cost: 8.0
Best run was number 5
Step 3: Applying KModes clustering passed. Cluster labels: [0 1 1 0 0 1]
Initialization method and algorithm are deterministic. Setting n_init to 1.
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run: 1, it