In [4]:
import numpy as np
import pandas as pd

In [5]:
# Load the data set
data = pd.read_csv('iris.csv')

In [6]:
# Extract the attributes
attributes = data.iloc[:, :4].values

In [7]:
# Set the number of clusters
k = 3

# Set the maximum number of iterations
max_iterations = 10

# Randomly initialize cluster means as data points
np.random.seed(42)
initial_means = attributes[np.random.choice(len(attributes), size=k, replace=False)]

# Initialize cluster assignments and cluster means
cluster_assignments = np.zeros(len(attributes))
cluster_means = np.zeros((k, attributes.shape[1]))

In [8]:
# Iterate for the specified number of iterations
for iteration in range(max_iterations):
    # Assign points to the nearest cluster
    for i, point in enumerate(attributes):
        distances = np.linalg.norm(point - initial_means, axis=1)
        cluster_assignments[i] = np.argmin(distances)

    # Update cluster means
    for cluster in range(k):
        cluster_points = attributes[cluster_assignments == cluster]
        cluster_means[cluster] = np.mean(cluster_points, axis=0)

    # Update initial means
    initial_means = cluster_means

In [9]:
# Print the final cluster means
print("Final Cluster Means:")
for cluster, mean in enumerate(cluster_means):
    print(f"Cluster {cluster+1}: {mean}")

Final Cluster Means:
Cluster 1: [5.9016129  2.7483871  4.39354839 1.43387097]
Cluster 2: [5.006 3.418 1.464 0.244]
Cluster 3: [6.85       3.07368421 5.74210526 2.07105263]


In [12]:
# Compute Jaccard distance for each cluster
print("\nJaccard Distance:")
for cluster in range(3):
    ground_truth_labels = data[data.columns[-1]].values
    cluster_labels = ground_truth_labels[cluster_assignments == cluster]

    true_positives = np.sum(cluster_labels == ('Iris ' + str(cluster+1)))
    
    false_positives = np.sum(cluster_labels != ('Iris ' + str(cluster+1)))
    
    false_negatives = np.sum(ground_truth_labels == ('Iris ' + str(cluster+1))) - true_positives
    
    

    jaccard_distance = false_positives / (true_positives + false_positives + false_negatives)
    print(f"Cluster {cluster+1}: {jaccard_distance}")



Jaccard Distance:
Cluster 1: 1.0
Cluster 2: 1.0
Cluster 3: 1.0
