In [6]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform

def hierarchical_clustering(X):
    # Calculate the pairwise distances between observations
    distances = squareform(pdist(X, metric='euclidean'))
    np.fill_diagonal(distances, np.inf)  # Replace diagonal with infinities to avoid self-merging
    
    # Initialize labels for each observation
    labels = np.arange(X.shape[0])
    
    # Initialize the linkage matrix
    linkage_matrix = []
    
    while len(labels) > 1:
        # Find the two clusters that are closest together
        i, j = np.unravel_index(np.argmin(distances), distances.shape)
        min_dist = distances[i, j]
        
        # Record the linkage
        linkage_matrix.append([labels[i], labels[j], min_dist, 2])
        
        # Merge the clusters
        new_label = max(labels) + 1
        labels = np.append(labels, new_label)
        labels = np.delete(labels, [i, j])
        
        # Calculate the new distances
        new_distances = np.min(distances[[i, j], :], axis=0)
        new_distances = np.delete(new_distances, [i, j])
        new_distances = np.append(new_distances, np.inf)  # Distance to itself is inf
        
        # Remove the old rows and columns from the distances matrix
        distances = np.delete(distances, [i, j], axis=0)
        distances = np.delete(distances, [i, j], axis=1)
        
        # Add the new row and column for the merged cluster
        distances = np.vstack((distances, new_distances))
        new_distances = np.append(new_distances, np.inf)  # Add inf at the end for the new column
        distances = np.column_stack((distances, new_distances))
    
    return np.array(linkage_matrix)

# Generate some random data
random_data = np.random.rand(8, 1)  # 10 observations with 2 features each

# Perform hierarchical clustering
linkage_matrix = hierarchical_clustering(random_data)

# Plot the dendrogram
plt.figure(figsize=(8, 4))
dendrogram(linkage_matrix)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Index')
plt.ylabel('Distance')
plt.show()

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 6 and the array at index 1 has size 7