In [1]:
from kmodes.kmodes import KModes
import numpy as np

In [2]:
# Sample categorical data
data = np.array([
    ['red', 'SUV', 'domestic'],
    ['blue', 'sedan', 'domestic'],
    ['green', 'SUV', 'domestic'],
    ['red', 'sedan', 'domestic'],
    ['blue', 'SUV', 'import'],
    ['green', 'sedan', 'import'],
    ['red', 'SUV', 'import'],
    ['blue', 'sedan', 'import'],
])
data

array([['red', 'SUV', 'domestic'],
       ['blue', 'sedan', 'domestic'],
       ['green', 'SUV', 'domestic'],
       ['red', 'sedan', 'domestic'],
       ['blue', 'SUV', 'import'],
       ['green', 'sedan', 'import'],
       ['red', 'SUV', 'import'],
       ['blue', 'sedan', 'import']], dtype='<U8')

In [3]:
# Initialize the K-Modes algorithm
k_modes = KModes(n_clusters=2, init='Huang', n_init=5, verbose=1)
k_modes

In [4]:
# Fit the model to the data
clusters = k_modes.fit_predict(data)
clusters

Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 0, cost: 8.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 1, cost: 6.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 0, cost: 10.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 4, iteration: 1/100, moves: 0, cost: 10.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 5, iteration: 1/100, moves: 1, cost: 6.0
Best run was number 2


array([1, 0, 1, 1, 0, 0, 1, 0], dtype=uint16)

In [5]:
# Print the cluster centroids
print('Cluster centroids:')
print(k_modes.cluster_centroids_)

Cluster centroids:
[['blue' 'sedan' 'import']
 ['red' 'SUV' 'domestic']]


In [6]:
# Print the cluster assignments
print('Cluster assignments:')
print(clusters)

Cluster assignments:
[1 0 1 1 0 0 1 0]


In [1]:
import pandas as pd
from kmodes.kmodes import KModes

In [2]:
# Load data from CSV file
data = pd.read_csv('homeprices.csv')
data

Unnamed: 0,A,X
0,B,X
1,C,Y
2,C,X
3,B,Y
4,A,Y


In [3]:
# Convert the dataframe to a numpy array
data_array = data.to_numpy()
data_array

array([['B', 'X'],
       ['C', 'Y'],
       ['C', 'X'],
       ['B', 'Y'],
       ['A', 'Y']], dtype=object)

In [4]:
# Initialize the K-Modes algorithm
k_modes = KModes(n_clusters=2, init='Huang', n_init=5, verbose=1)
k_modes

In [5]:
# Fit the model to the data
clusters = k_modes.fit_predict(data_array)
clusters

Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 0, cost: 3.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 1, cost: 3.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 1, cost: 3.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 4, iteration: 1/100, moves: 0, cost: 3.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 5, iteration: 1/100, moves: 0, cost: 3.0
Best run was number 1


array([1, 0, 0, 0, 0], dtype=uint16)

In [6]:
# Print the cluster centroids and the cluster assignments
print('Cluster centroids:')
print(k_modes.cluster_centroids_)

Cluster centroids:
[['C' 'Y']
 ['B' 'X']]


In [7]:
# Print the cluster assignments
print('Cluster assignments:')
print(clusters)

Cluster assignments:
[1 0 0 0 0]


In [8]:
# Optionally, add the cluster assignments to the original dataframe
data['Cluster'] = clusters
data

Unnamed: 0,A,X,Cluster
0,B,X,1
1,C,Y,0
2,C,X,0
3,B,Y,0
4,A,Y,0


In [9]:
# Save the results to a new CSV file
data.to_csv('clustered_data.csv', index=False)