# Example 1.


In [2]:
import pandas as pd

vehicle = pd.read_csv("../data/vehicle.csv", dtype="object")

vehicle.columns = [col.replace("_", "-").title() for col in vehicle.columns]
cols = ["Price", "Maintenance", "Doors", "Passengers", "Wheels", "Eco-Friendly"]

vehicle = vehicle[cols]


---
# Example 2.

In [3]:
import pandas as pd

vehicle = pd.read_csv("../data/vehicle.csv", dtype="object")

vehicle.columns = [col.replace("_", "-").title() for col in vehicle.columns]
cols = ["Price", "Maintenance", "Doors", "Passengers", "Wheels", "Eco-Friendly"]

vehicle = vehicle[cols]
mode = vehicle.describe().loc["top"].values

tex = "\\[ \n\t\\mu = \\left["
for value in mode:
    tex += f"{value}, \ "
tex = tex[:-4]
tex += "\\right] \n\\]"


---
# Example 3.

### Relative frequency table

In [15]:
import pandas as pd

vehicle = pd.read_csv("../data/vehicle.csv", dtype="object")

vehicle.columns = [col.replace("_", "-").title() for col in vehicle.columns]
cols = ["Price", "Maintenance", "Doors", "Passengers", "Wheels", "Eco-Friendly"]

vehicle = vehicle[cols]

idxs = ["0", "1", "2", "3", "4", "5", "7", "8", "L", "M", "H", "V"]

relative_freq = (
    pd.DataFrame(
        {col: vehicle[col].value_counts(normalize=True) for col in vehicle.columns}
    )
    .reindex(idxs)
    .fillna(0)
)

relative_freq = relative_freq[cols]


In [67]:
values = relative_freq.iloc[:, 0]
categories, relative_freqs = values.index, values.values

prob_dist_dict = {
    cat: rel_freq for cat, rel_freq in zip(categories, relative_freqs) if rel_freq > 0
}
probability_dist = pd.DataFrame(prob_dist_dict, index=[""])
probability_dist[r"$A_1$"] = r"$\mathbb{P}(A_1 = a^{(1)})$"
probability_dist.set_index(r"$A_1$")[["L", "M", "H", "V"]]


### Huang's virtual modes and dissimilarity table


In [5]:
import pandas as pd
import numpy as np

from collections import defaultdict
from copy import deepcopy


def dissim(X, x):

    return np.sum(X != x, axis=1)


vehicle = pd.read_csv("../data/vehicle.csv", dtype="object")

vehicle.columns = [col.replace("_", "-").title() for col in vehicle.columns]
cols = ["Price", "Maintenance", "Doors", "Passengers", "Wheels", "Eco-Friendly"]
vehicle = vehicle[cols]


In [6]:
data = vehicle.values
n_attrs = data.shape[1]
modes = np.empty((3, n_attrs), dtype="object")

# Build frequency dictionary for attribute values
for iattr in range(n_attrs):
    freq = defaultdict(int)
    for curattr in data[:, iattr]:
        freq[curattr] += 1

    choices = [chc for chc, wght in freq.items() for _ in range(wght)]

    choices = sorted(choices)
    modes[:, iattr] = np.random.choice(choices, 3)

tex = "\\begin{equation} \n \\begin{aligned} \n \t \\tilde{\\mu} = \\left\{ "
for mode in modes:
    tex += "& \\left["
    for val in mode:
        tex += "\\text{" + f"{val}" + "}, \ "
    tex = tex[:-4]
    tex += "\\right], \\\\ "
tex = tex[:-5]
tex += "\\right\} \\\\ \n \\end{aligned} \n \\end{equation}"


In [7]:
mode = modes[0, :]

dissim_df = deepcopy(vehicle)

dissim_df[r"Dissimilarity to $\tilde{\mu}_1$"] = dissim(data, mode)
dissim_df.sort_values(r"Dissimilarity to $\tilde{\mu}_1$", ascending=True, inplace=True)


### Huang's initial modes


In [8]:
from kmodes.kmodes import init_huang

import pandas as pd
import numpy as np


In [9]:
def dissim(X, x):

    return np.sum(X != x, axis=1)


In [10]:
vehicle = pd.read_csv("../data/vehicle.csv", dtype="object")

vehicle.columns = [col.replace("_", "-").title() for col in vehicle.columns]
cols = ["Price", "Maintenance", "Doors", "Passengers", "Wheels", "Eco-Friendly"]

vehicle = vehicle[cols]


In [11]:
data = vehicle.values

np.random.seed(0)
modes = init_huang(data, 3, dissim)


In [12]:
tex = "\\begin{equation} \n \\begin{aligned} \n \t \\bar{\\mu} = \\left\{ "
for mode in modes:
    tex += "& \\left["
    for val in mode:
        tex += "\\text{" + f"{val}" + "}, \ "
    tex = tex[:-4]
    tex += "\\right], \\\\ "
tex = tex[:-5]
tex += "\\right\} \\\\ \n \\end{aligned} \n \\end{equation}"


# Example 4.


In [102]:
from kmodes.kmodes import init_cao


def density(Y, x):
    N, m = Y.shape
    summed_dissim = np.sum(dissim(Y, x))
    return 1 - summed_dissim / (N * m)


np.random.seed(0)
modes = init_cao(data, 3, dissim)[:2]


In [115]:
density_dissims = np.empty((2, len(vehicle)))

for i, mode in enumerate(modes):
    density_dissims[i, :] = np.array([density(data, x) for x in data]) * dissim(
        data, mode
    )

density_dissims = np.round(density_dissims, 2)


In [116]:
mode1, mode2 = density_dissims


In [117]:
min_vals = []
for val1, val2 in zip(mode1, mode2):
    if val1 != val2:
        min_vals.append(min(val1, val2))
    else:
        min_vals.append(val1)
min_vals


[0.0, 1.2, 1.13, 0.0, 1.3, 1.1, 1.15, 1.15, 0.95, 0.87]