In [1]:
import numpy as np
np.random.seed(42)

import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FutureWarning)

import random
from sklearn.cluster import KMeans
from sklearn_extra.cluster import KMedoids

def kmeans_from_points(points):
    ret = {}
    for n_clusters in range(1, 6):
        kmed = KMeans(
            n_clusters=n_clusters,
            algorithm="lloyd",
            init="k-means++",
        )
        kmed.fit(points)
        labels = kmed.labels_
        cluster_centers = kmed.cluster_centers_

        dists = []
        for c in range(n_clusters):
            idx = np.where(labels == c)[0]
            if idx.size <= 1:
                dists.append(0.0)
            else:
                cluster = points[idx]
                cluster_distance = np.abs(cluster - cluster_centers[c:c+1])
                dists.append(cluster_distance.mean())
        ret[n_clusters] = round(np.mean(dists), 4)
    return ret

def clustering(tensor):
    N, D = tensor.shape
    print(f"### kmedoids results -- random {N} pairs: {kmeans_from_points(tensor)}")

N, D = 100, 2
points = np.random.rand(N, D)

O = points.copy()
O = 2.0 * O - 1.0
clustering(O)
A = points.copy()
A[:N//2] = -1.0 * A[:N//2]
A[N//2:] = A[N//2:]
clustering(A)
B = points.copy()
B[:N//3] = 0.66 * B[:N//3] - 1.0
B[N//3:2*N//3] = 0.66 * B[N//3:2*N//3] - 0.33
B[2*N//3:] = 0.66 * B[2*N//3:] + 0.33
clustering(B)


### kmedoids results -- random 100 pairs: {1: 0.5176, 2: 0.3712, 3: 0.2796, 4: 0.2342, 5: 0.2005}
### kmedoids results -- random 100 pairs: {1: 0.4844, 2: 0.2573, 3: 0.2063, 4: 0.1816, 5: 0.1528}
### kmedoids results -- random 100 pairs: {1: 0.5106, 2: 0.2661, 3: 0.1691, 4: 0.1413, 5: 0.1287}


In [8]:
import os
import ast
from collections import defaultdict

def parse_defaultdict(line):
    target = "defaultdict(<class 'float'>, "
    st = line.find(target)
    dict_str = line[st:].replace(target, "")[:-1]  # strip trailing ")"
    parsed_dict = ast.literal_eval(dict_str)
    return parsed_dict

def parse_dict(line):
    st = line.find(": {") + 2
    parsed_dict = ast.literal_eval(line[st:])
    return parsed_dict

def clustering(path):
    counts = [0, 0, 0]
    norm = defaultdict(float)
    cluster_all = defaultdict(float)
    cluster_last = defaultdict(float)
    with open(path, "r") as fin:
        for line in fin:
            line = line.strip()
            if "### grad accum" in line:
                counts[0] += 1
                for k,v in parse_defaultdict(line).items():
                    k = ".".join(k.split(".")[:3])
                    norm[k] += v
            if "### kmedoids results -- all" in line:
                counts[1] += 1
                for k,v in parse_dict(line).items():
                    cluster_all[k] += v
            if "### kmedoids results -- last" in line:
                counts[2] += 1
                for k,v in parse_dict(line).items():
                    cluster_last[k] += v

    print(f"Statistics based on {counts[0]} DAPO examples")
    # norm = [(round(v,4), k) for k,v in norm.items()]
    # norm = "\n".join(f"{x[0]} {x[1]}" for x in sorted(norm, key=lambda x: -x[0]))
    # print(f"### grad accum: \n{norm}")
    cluster_all = list((i, round(cluster_all[i]/counts[1],4)) for i in range(1, 6))
    print(f"### cluster results -- all: {cluster_all}")
    cluster_last = list((i, round(cluster_last[i]/counts[2],4)) for i in range(1, 6))
    print(f"### cluster results -- last: {cluster_last}")

base = "/apdcephfs_cq10/share_1603164/user/lfsong/exp.tencent_chat/grad_sim_logs"
for filename in ["qwen3_base_grpo.log", "grpo_bl_20.log", "grpo_bl_250.log", "grpo_ppl_250.log", "grpo_bl_400_1620.log", 
                 "qwen3_base_ppo.log", "ppo_bl_250.log", "ppo_mhead_250.log"]:
    path = os.path.join(base, filename)
    print(f"====={filename}=====")
    clustering(path)

=====qwen3_base_grpo.log=====


Statistics based on 560 DAPO examples
### cluster results -- all: [(1, 8.5957), (2, 2.9884), (3, 1.7006), (4, 1.1678), (5, 0.8)]
### cluster results -- last: [(1, 10.4557), (2, 3.8803), (3, 2.3285), (4, 1.6606), (5, 1.1696)]
=====grpo_bl_20.log=====
Statistics based on 560 DAPO examples
### cluster results -- all: [(1, 2.3628), (2, 1.0793), (3, 0.8146), (4, 0.6744), (5, 0.5827)]
### cluster results -- last: [(1, 4.3099), (2, 2.1138), (3, 1.5912), (4, 1.3174), (5, 1.1388)]
=====grpo_bl_250.log=====
Statistics based on 560 DAPO examples
### cluster results -- all: [(1, 1.7835), (2, 0.9614), (3, 0.7257), (4, 0.6052), (5, 0.5289)]
### cluster results -- last: [(1, 1.9829), (2, 1.0529), (3, 0.7961), (4, 0.6587), (5, 0.5722)]
=====grpo_ppl_250.log=====
Statistics based on 560 DAPO examples
### cluster results -- all: [(1, 1.7167), (2, 0.9027), (3, 0.6906), (4, 0.5752), (5, 0.5028)]
### cluster results -- last: [(1, 3.0139), (2, 1.5585), (3, 1.1969), (4, 0.9983), (5, 0.8762)]
=====grpo_bl_400

In [None]:
from sklearn.manifold import MDS
import matplotlib.pyplot as plt

def kmedoids_from_distance(distance):
    ret = {}
    for n_clusters in range(1, 6):
        kmed = KMedoids(
            n_clusters=n_clusters,
            metric="precomputed",
            method="pam",
            init="k-medoids++",
        )
        kmed.fit(distance)
        labels = kmed.labels_
        medoid_indices = kmed.medoid_indices_

        dists = []
        for c in range(n_clusters):
            idx = np.where(labels == c)[0]
            if idx.size <= 1:
                dists.append(0.0)
            else:
                dists.append(distance[idx, medoid_indices[c]].sum() / (idx.size - 1))
        ret[n_clusters] = round(np.mean(dists), 4)
    return ret

# dist_matrix: NxN 距离矩阵
mds = MDS(n_components=2, dissimilarity='precomputed', random_state=42)
X_2d = mds.fit_transform(dist_matrix)

plt.scatter(X_2d[:, 0], X_2d[:, 1], c=labels, cmap='tab10')
plt.title('MDS 2D projection')
plt.show()