In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from flexclust import flexclust
from flexclust import bootFlexclust
from flexclust import slswFlexclust
from flexmix import flexmix
from partykit import ctree
from lattice import barchart
from mosaicplot import mosaicplot

# Load the McDonald's dataset
mcdonalds = pd.read_csv("https://raw.githubusercontent.com/ajdamico/usdlnr/master/data/inst/mcdonalds.csv")

# Extract the variables
MD.x = mcdonalds.iloc[:, 1:11]

# Standardize the variables
scaler = StandardScaler()
MD.x = scaler.fit_transform(MD.x)

# Perform principal component analysis
MD.pca = PCA().fit(MD.x)

# Print summary statistics of the PCA results
print(MD.pca.explained_variance_ratio_)
print(MD.pca.singular_values_)
print(MD.pca.components_)

# Cluster the data using K-means and visualize the results
kmeans = KMeans(n_clusters=4, random_state=1234).fit(MD.x)
MD.k4 = kmeans.labels_
plot(MD.pca.transform(MD.x), c=MD.k4, cmap='gray')
projAxes(MD.pca)

# Perform hierarchical clustering and visualize the results
MD.vclust = pd.DataFrame(np.transpose(np.vstack([list(range(1, MD.x.shape[0] + 1)), hclust(dist(np.transpose(MD.x)))["order"]])))
MD.vclust.columns = ["index", "order"]
barchart(MD.k4, shade=True, which=MD.vclust["order"].iloc[::-1].values - 1)

# Fit a flexible clustering model using stepFlexclust and visualize the results
MD.km28 = flexclust.stepFlexclust(MD.x, k=range(2, 9), nrep=10, verbose=False)
MD.km28 = flexclust.relabel(MD.km28)
plot(MD.km28, xlab="number of segments")

# Fit a flexible clustering model using bootFlexclust and visualize the results
MD.b28 = bootFlexclust(MD.x, k=range(2, 9), nrep=10, nboot=100)
plot(MD.b28, xlab="number of segments", ylab="adjusted Rand index")

# Fit a flexible mixture model using stepFlexmix and visualize the results
MD.m28 = flexmix.stepFlexmix(MD.x, k=range(2, 9), nrep=10, model=flexmix.FLXMCmvbinary(), verbose=False)
plot(MD.m28, ylab="value of information criteria (AIC, BIC, ICL)")

# Fit a flexible mixture model using flexmix and compare with K-means
MD.m4a = flexmix(MD.x, cluster=MD.k4, model=flexmix.FLXMCmvbinary())
table(kmeans=MD.k4, mixture=MD.m4a[0].cluster)

# Perform regression using stepFlexmix
mcdonalds["Like.n"] = 6 - pd.to_numeric(mcdonalds["Like"])
f = "Like.n ~ " + " + ".join(mcdonalds.columns[1:11])
MD.reg2 = flexmix.stepFlexmix(f, data=mcdonalds, k=2, nrep=10, verbose=False
