In [1]:
import pandas as pd
import numpy as np

from copy import deepcopy
from collections import defaultdict
from kmodes.kmodes import init_cao

In [2]:
def dissim(X, x):
    return np.sum(X != x, axis=1)


def summed_dissim(X, x):
    return np.sum(dissim(X, x))


def density(X, x):
    N, m = X.shape
    return 1 - summed_dissim(X, x) / (m * N)

In [3]:
df = pd.read_csv("../../data/vehicle.csv")
df.index += 1

X = df.values

In [4]:
df.columns = [
    "No. doors",
    "Eco-friendly",
    "Maintenance costs",
    "No. passengers",
    "Buying price",
    "No. wheels",
]
df = df[
    [
        "Buying price",
        "Maintenance costs",
        "No. doors",
        "No. passengers",
        "No. wheels",
        "Eco-friendly",
    ]
]

## Calculate densities, take first mode

In [5]:
np.random.seed(0)

In [6]:
density_df = deepcopy(df)
density_df["Density"] = [density(X, x) for x in X]

sorted_density_df = density_df.sort_values("Density", ascending=False)
sorted_density_df.to_latex(
    "../../tex/ranked_density_table.tex", column_format="cccccccc"
)

In [7]:
modes = []

idx = sorted_density_df.index[0] - 1
mode = X[idx]

modes.append(mode)

## Select second mode

In [8]:
np.random.seed(0)


def pairwise_dissim(A, B):
    return np.sum(A != B, axis=0)


dissim_df = deepcopy(density_df)
dissim_df["Dissimilarity"] = [pairwise_dissim(mode, x) for x in X]

dissim_df["Density-dissimilarity"] = dissim_df["Density"] * dissim_df["Dissimilarity"]

sorted_dissim_df = dissim_df.sort_values("Density-dissimilarity", ascending=False)[:10]
sorted_dissim_df.to_latex(
    "../../tex/ranked_dens_dissim_table.tex", column_format="cccccccccc"
)

In [9]:
idx = sorted_dissim_df.index[0] - 1
mode = X[idx]

modes.append(mode)

## Final mode

In [10]:
densities = [density(X, x) for x in X]
density_dissims = np.zeros((2, 10))

for l in range(len(modes)):
    density_dissims[l, :] = densities * dissim(X, modes[l])

density_dissims

array([[0.        , 1.2       , 1.13333333, 1.53333333, 1.3       ,
        1.46666667, 1.53333333, 1.15      , 0.95      , 0.86666667],
       [1.93333333, 1.8       , 1.7       , 0.        , 1.3       ,
        1.1       , 1.15      , 1.91666667, 1.26666667, 1.3       ]])

In [11]:
mu_bar = init_cao(X, 3, dissim)

In [12]:
bar = "\\begin{equation}\n"
bar += "\\nonumber\n"
bar += "\\begin{aligned}\n"
bar += "\\bar{\mu} = \{ "
for i in range(len(mu_bar)):
    row = mu_bar[i]
    mode_str = " & \\left["
    for compt in row:
        mode_str += "\\text{" + str(compt) + "}, \\ "
    mode_str = mode_str[:-4]
    mode_str += "\\right], \\\ "
    bar += mode_str
bar = bar[:-5]
bar += "\} \\\ \n"
bar += "\\end{aligned}\n"
bar += "\\end{equation}"

In [13]:
with open("../../tex/cao_initial_modes.tex", "w") as f:
    f.write(bar)