In [6]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# Read data from CSV file
mcdonalds = pd.read_csv('mcdonald.csv')

# Convert "Yes" and "No" values to binary (0 and 1)
mcdonalds = mcdonalds.replace({"Yes": 1, "No": 0})

# Perform PCA
pca = PCA()
MD_x = mcdonalds.iloc[:, 1:12].values
MD_pca = pca.fit_transform(MD_x)

# Print column means
column_means = np.round(np.mean(MD_x, axis=0), 2)
print("Column Means:")
print(column_means)

# Print PCA summary
print("PCA Summary:")
print(pca.explained_variance_ratio_)
print(pca.singular_values_)

# Plot PCA results
plt.scatter(MD_pca[:, 0], MD_pca[:, 1], c='grey')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

# Perform K-means clustering
kmeans = KMeans(n_clusters=4, random_state=1234)
kmeans.fit(MD_x)
MD_k4 = kmeans.labels_

# Perform Gaussian Mixture Model clustering
gmm = GaussianMixture(n_components=4, random_state=1234)
gmm.fit(MD_x)
MD_m4 = gmm.predict(MD_x)

# Print clustering results
clustering_results = pd.DataFrame({'K-Means': MD_k4, 'GMM': MD_m4})
print("Clustering Results:")
print(clustering_results)

# Perform decision tree analysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# Preprocess 'Like' column
mcdonalds['Like_n'] = 6 - mcdonalds['Like'].astype(int)

# Prepare training data
X = mcdonalds.iloc[:, 1:12]
y = mcdonalds['Like_n']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Fit decision tree classifier
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)

# Plot decision tree
from sklearn.tree import plot_tree

plt.figure(figsize=(10, 6))
plot_tree(tree, feature_names=X.columns, class_names=[str(i) for i in range(1, 7)], filled=True)
plt.show()


ValueError: ignored