In [None]:
# Answer1.

import pandas as pd

# Load the dataset into a DataFrame
df = pd.read_csv('wine.csv')

# Split the dataset into features (X) and target (y) variables
X = df.drop('target', axis=1)
y = df['target']

from sklearn.preprocessing import StandardScaler

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

from sklearn.decomposition import PCA

# Create an instance of PCA
pca = PCA()

# Fit the PCA model to the preprocessed data
pca.fit(X_scaled)

# Transform the data onto the principal components
X_pca = pca.transform(X_scaled)

import numpy as np
import matplotlib.pyplot as plt

# Calculate the cumulative explained variance ratio
cumulative_variance_ratio = np.cumsum(pca.explained_variance_ratio_)

# Plot the cumulative explained variance ratio
plt.plot(range(1, len(cumulative_variance_ratio) + 1), cumulative_variance_ratio)
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Cumulative Explained Variance vs. Number of Principal Components')
plt.show()

# Create a scatter plot of the data using the first two principal components
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Scatter Plot of Data in PCA Space')
plt.show()

from sklearn.cluster import KMeans

# Specify the number of clusters (K)
K = 3

# Create an instance of KMeans
kmeans = KMeans(n_clusters=K, random_state=42)

# Fit the K-means model to the PCA-transformed data
kmeans.fit(X_pca)

# Get the cluster labels
cluster_labels = kmeans.labels_

from sklearn import metrics

# Calculate the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_

# Print the explained variance ratio for each principal component
for i, ratio in enumerate(explained_variance_ratio):
    print(f"Explained Variance Ratio of Principal Component {i+1}: {ratio:.4f}")

# Evaluate the clustering performance using metrics
silhouette_score = metrics.silhouette_score(X_pca, cluster_labels)
calinski_harabasz_score = metrics.calinski_harabasz_score(X_pca, cluster_labels)
davies_bouldin_score = metrics.davies_bouldin_score(X_pca, cluster_labels)

print(f"Silhouette Score: {silhouette_score:.4f}")
print(f"Calinski-Harabasz Score: {calinski_harabasz_score:.4f}")
print(f"Davies-Bouldin Score: {davies_bouldin_score:.4f}")