In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("/content/cancer.csv")

# Extract the features for clustering
X = df.iloc[:, 2:].values  # Selecting all columns except 'id' and 'diagnosis' for clustering

# Define the distance function (Euclidean distance)
def distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

# Define the agglomerative clustering algorithm
def agglomerative_clustering(X, n_clusters, linkage):
    n_samples = X.shape[0]

    # Initialize clusters with each sample as a separate cluster
    clusters = [[i] for i in range(n_samples)]

    # Perform agglomerative clustering
    while len(clusters) > n_clusters:
        min_dist = np.inf
        min_indices = (None, None)
        for i in range(len(clusters)):
            for j in range(i + 1, len(clusters)):
                for idx1 in clusters[i]:
                    for idx2 in clusters[j]:
                        dist = distance(X[idx1], X[idx2])
                        if dist < min_dist:
                            min_dist = dist
                            min_indices = (i, j)

        cluster1 = min_indices[0]
        cluster2 = min_indices[1]
        if cluster1==None or cluster2==None:
          continue
        clusters[cluster1].extend(clusters[cluster2])
        del clusters[cluster2]

    return clusters

# Perform hierarchical clustering with single linkage
clusters_single = agglomerative_clustering(X, n_clusters=2, linkage='single')

# Perform hierarchical clustering with complete linkage
clusters_complete = agglomerative_clustering(X, n_clusters=2, linkage='complete')

# Perform hierarchical clustering with average linkage
clusters_average = agglomerative_clustering(X, n_clusters=2, linkage='average')

# Plot the clusters with single linkage
plt.figure(figsize=(15, 5))
plt.subplot(131)
for i, cluster in enumerate(clusters_single):
    cluster_points = X[cluster]
    plt.scatter(cluster_points[:, 0], cluster_points[:, 1], label=f'Cluster {i+1}')
plt.xlabel('radius_mean')
plt.ylabel('texture_mean')
plt.title('Single Linkage Clustering')
plt.legend()

# Plot the clusters with complete linkage
plt.subplot(132)
for i, cluster in enumerate(clusters_complete):
    cluster_points = X[cluster]
    plt.scatter(cluster_points[:, 0], cluster_points[:, 1], label=f'Cluster {i+1}')
plt.xlabel('radius_mean')
plt.ylabel('texture_mean')
plt.title('Complete Linkage Clustering')
plt.legend()

# Plot the clusters with average linkage
plt.subplot(133)
for i, cluster in enumerate(clusters_average):
    cluster_points = X[cluster]
    plt.scatter(cluster_points[:, 0], cluster_points[:, 1], label=f'Cluster {i+1}')
plt.xlabel('radius_mean')
plt.ylabel('texture_mean')
plt.title('Average Linkage Clustering')
plt.legend()

plt.tight_layout()
plt.show()

KeyboardInterrupt: ignored