In [2]:
import numpy as np
import pandas as pd


In [3]:
data = {
    'Điểm': ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7'],
    'Feature 1': [2.1, 3.2, 3.6, 7.9, 8.6, 9.1, 1.2],
    'Feature 2': [3.1, 3.6, 3.1, 8.1, 8.7, 8.1, 2.1],
    'Feature 3': [1.6, 2.1, 2.6, 7.6, 8.2, 8.6, 1.7]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Điểm,Feature 1,Feature 2,Feature 3
0,P1,2.1,3.1,1.6
1,P2,3.2,3.6,2.1
2,P3,3.6,3.1,2.6
3,P4,7.9,8.1,7.6
4,P5,8.6,8.7,8.2
5,P6,9.1,8.1,8.6
6,P7,1.2,2.1,1.7


In [4]:
X = df[['Feature 1', 'Feature 2', 'Feature 3']].values
labels = df['Điểm'].tolist()

X, labels


(array([[2.1, 3.1, 1.6],
        [3.2, 3.6, 2.1],
        [3.6, 3.1, 2.6],
        [7.9, 8.1, 7.6],
        [8.6, 8.7, 8.2],
        [9.1, 8.1, 8.6],
        [1.2, 2.1, 1.7]]),
 ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7'])

In [5]:
def euclidean(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))


In [6]:
def farthest_points(indices):
    max_dist = -1
    pair = None

    for i in indices:
        for j in indices:
            if i < j:
                d = euclidean(X[i], X[j])
                if d > max_dist:
                    max_dist = d
                    pair = (i, j)

    return pair, max_dist


In [7]:
def split_cluster(indices):
    (i, j), dist = farthest_points(indices)

    cluster1 = [i]
    cluster2 = [j]

    for k in indices:
        if k != i and k != j:
            d1 = euclidean(X[k], X[i])
            d2 = euclidean(X[k], X[j])

            if d1 < d2:
                cluster1.append(k)
            else:
                cluster2.append(k)

    return cluster1, cluster2, dist


In [8]:
clusters = [list(range(len(X)))]
step = 1

TOP-DOWN HIERARCHICAL CLUSTERING


In [9]:
while any(len(c) > 1 for c in clusters):
    clusters.sort(key=len, reverse=True)
    current = clusters.pop(0)

    c1, c2, dist = split_cluster(current)

    print(f"Vòng {step}: Tách cụm {[labels[i] for i in current]}")
    print(f"  → Hai seed xa nhất, khoảng cách = {dist:.4f}")
    print(f"  → Cụm 1: {[labels[i] for i in c1]}")
    print(f"  → Cụm 2: {[labels[i] for i in c2]}")
    print("-" * 60)

    clusters.append(c1)
    clusters.append(c2)

    step += 1


Vòng 1: Tách cụm ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7']
  → Hai seed xa nhất, khoảng cách = 12.0839
  → Cụm 1: ['P6', 'P4', 'P5']
  → Cụm 2: ['P7', 'P1', 'P2', 'P3']
------------------------------------------------------------
Vòng 2: Tách cụm ['P7', 'P1', 'P2', 'P3']
  → Hai seed xa nhất, khoảng cách = 2.7514
  → Cụm 1: ['P3', 'P2']
  → Cụm 2: ['P7', 'P1']
------------------------------------------------------------
Vòng 3: Tách cụm ['P6', 'P4', 'P5']
  → Hai seed xa nhất, khoảng cách = 1.5620
  → Cụm 1: ['P4']
  → Cụm 2: ['P6', 'P5']
------------------------------------------------------------
Vòng 4: Tách cụm ['P3', 'P2']
  → Hai seed xa nhất, khoảng cách = 0.8124
  → Cụm 1: ['P2']
  → Cụm 2: ['P3']
------------------------------------------------------------
Vòng 5: Tách cụm ['P7', 'P1']
  → Hai seed xa nhất, khoảng cách = 1.3491
  → Cụm 1: ['P1']
  → Cụm 2: ['P7']
------------------------------------------------------------
Vòng 6: Tách cụm ['P6', 'P5']
  → Hai seed xa nhất, k

In [10]:
print("KẾT QUẢ CUỐI:")
for c in clusters:
    print([labels[i] for i in c])


KẾT QUẢ CUỐI:
['P4']
['P2']
['P3']
['P1']
['P7']
['P5']
['P6']


In [11]:
import pandas as pd
from sklearn.cluster import AgglomerativeClustering
data = {
    'Điểm': ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7'],
    'Feature 1': [2.1, 3.2, 3.6, 7.9, 8.6, 9.1, 1.2],
    'Feature 2': [3.1, 3.6, 3.1, 8.1, 8.7, 8.1, 2.1],
    'Feature 3': [1.6, 2.1, 2.6, 7.6, 8.2, 8.6, 1.7]
}

df = pd.DataFrame(data)
X = df[['Feature 1', 'Feature 2', 'Feature 3']].values
model = AgglomerativeClustering(
    n_clusters=2,
    linkage='single',
    metric='euclidean'
)

df['Cụm'] = model.fit_predict(X)

print(df[['Điểm', 'Cụm']])


  Điểm  Cụm
0   P1    0
1   P2    0
2   P3    0
3   P4    1
4   P5    1
5   P6    1
6   P7    0


In [12]:
import torch
import pandas as pd
data = {
    'Điểm': ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7'],
    'Feature 1': [2.1, 3.2, 3.6, 7.9, 8.6, 9.1, 1.2],
    'Feature 2': [3.1, 3.6, 3.1, 8.1, 8.7, 8.1, 2.1],
    'Feature 3': [1.6, 2.1, 2.6, 7.6, 8.2, 8.6, 1.7]
}

df = pd.DataFrame(data)

X = torch.tensor(
    df[['Feature 1', 'Feature 2', 'Feature 3']].values,
    dtype=torch.float32
)

labels = df['Điểm'].tolist()
def euclidean(p1, p2):
    return torch.sqrt(torch.sum((p1 - p2) ** 2))
clusters = [[i] for i in range(len(X))]

while len(clusters) > 2:
    min_dist = float('inf')
    pair = None

    for i in range(len(clusters)):
        for j in range(i + 1, len(clusters)):
            for a in clusters[i]:
                for b in clusters[j]:
                    d = euclidean(X[a], X[b])
                    if d < min_dist:
                        min_dist = d
                        pair = (i, j)

    i, j = pair
    clusters[i] += clusters[j]
    clusters.pop(j)

print("Cụm 1:", [labels[i] for i in clusters[0]])
print("Cụm 2:", [labels[i] for i in clusters[1]])


Cụm 1: ['P1', 'P2', 'P3', 'P7']
Cụm 2: ['P4', 'P5', 'P6']
