# Example 1.

In [1]:
import pandas as pd

vehicle = pd.read_csv('../data/vehicle.csv', dtype='object')

vehicle.columns = [col.replace('_', '-').title() for col in vehicle.columns]
cols = ['Price', 'Maintenance', 'Doors', 'Passengers', 'Wheels', 'Eco-Friendly']

vehicle = vehicle[cols]

---
# Example 2.

In [12]:
import pandas as pd

vehicle = pd.read_csv('../data/vehicle.csv', dtype='object')

vehicle.columns = [col.replace('_', '-').title() for col in vehicle.columns]
cols = ['Price', 'Maintenance', 'Doors', 'Passengers', 'Wheels', 'Eco-Friendly']

vehicle = vehicle[cols]
mode = vehicle.describe().loc['top'].values

tex = '\\[ \n\t\\mu = \\left['
for value in mode:
    tex += f'{value}, \ '
tex = tex[:-4]
tex += '\\right] \n\\]'

---
# Example 3.

### Relative frequency table

In [3]:
import pandas as pd

vehicle = pd.read_csv('../data/vehicle.csv', dtype='object')

vehicle.columns = [col.replace('_', '-').title() for col in vehicle.columns]
cols = ['Price', 'Maintenance', 'Doors', 'Passengers', 'Wheels', 'Eco-Friendly']

vehicle = vehicle[cols]

idxs = ['0', '1', '2', '3', '4', '5', '7', '8', 'L', 'M', 'H', 'V']

relative_freq = pd.DataFrame({col: vehicle[col].value_counts(normalize=True) \
                              for col in vehicle.columns}).reindex(idxs).fillna(0)

### Huang's virtual modes and dissimilarity table

In [4]:
import pandas as pd
import numpy as np

from collections import defaultdict
from copy import deepcopy

def dissim(X, x):
    
    return np.sum(X != x, axis=1)

vehicle = pd.read_csv('../data/vehicle.csv', dtype='object')

vehicle.columns = [col.replace('_', '-').title() for col in vehicle.columns]
cols = ['Price', 'Maintenance', 'Doors', 'Passengers', 'Wheels', 'Eco-Friendly']
vehicle = vehicle[cols]

In [5]:
data = vehicle.values
n_attrs = data.shape[1]
modes = np.empty((3, n_attrs), dtype='object')

# Build frequency dictionary for attribute values
for iattr in range(n_attrs):
    freq = defaultdict(int)
    for curattr in data[:, iattr]:
        freq[curattr] += 1

    choices = [chc for chc, wght in freq.items() for _ in range(wght)]

    choices = sorted(choices)
    modes[:, iattr] = np.random.choice(choices, 3)

tex = '\\begin{equation} \n \\begin{aligned} \n \t \\tilde{\\mu} = \\left\{ '
for mode in modes:
    tex += '& \\left['
    for val in mode:
        tex += '\\text{' + f'{val}' + '}, \ '
    tex = tex[:-4]
    tex += '\\right], \\\\ '
tex = tex[:-5]
tex += '\\right\} \\\\ \n \\end{aligned} \n \\end{equation}'

In [6]:
mode = modes[0, :]

dissim_df = deepcopy(vehicle)

dissim_df[r'Dissimilarity to $\tilde{\mu}_1$'] = dissim(data, mode)
dissim_df.sort_values(r'Dissimilarity to $\tilde{\mu}_1$', ascending=True, inplace=True)

### Huang's initial modes

In [7]:
from kmodes.kmodes import init_huang

import pandas as pd
import numpy as np

In [8]:
def dissim(X, x):
    
    return np.sum(X != x, axis=1)

In [9]:
vehicle = pd.read_csv('../data/vehicle.csv', dtype='object')

vehicle.columns = [col.replace('_', '-').title() for col in vehicle.columns]
cols = ['Price', 'Maintenance', 'Doors', 'Passengers', 'Wheels', 'Eco-Friendly']

vehicle = vehicle[cols]

In [10]:
data = vehicle.values

np.random.seed(0)
modes = init_huang(data, 3, dissim)

In [11]:
tex = '\\begin{equation} \n \\begin{aligned} \n \t \\bar{\\mu} = \\left\{ '
for mode in modes:
    tex += '& \\left['
    for val in mode:
        tex += '\\text{' + f'{val}' + '}, \ '
    tex = tex[:-4]
    tex += '\\right], \\\\ '
tex = tex[:-5]
tex += '\\right\} \\\\ \n \\end{aligned} \n \\end{equation}'