In [None]:
import wandb
import pandas as pd
import os
os.environ["WANDB_BASE_URL"] = "https://api.bandw.top" 
# 登录 wandb
wandb.login()

# origin_float16	ac-milan/verl_train_gnode5/gscirgoa
# origin	ac-milan/verl_train_gnode5/i41hd1u6
# en_di_2_23	ac-milan/verl_train_gnode5/g96tqll8
# er_pyr_3	ac-milan/verl_train_gnode5/y5wbrs1d


# origin	ac-milan/verl_train/vke8gix4
# er_di_2_3	ac-milan/verl_train/wxh4hh13


# 获取项目中的所有 run
api = wandb.Api()
run = api.run("ac-milan/verl_train_gnode5/i41hd1u6")
# ac-milan/verl_train/vke8gix4
run2 = api.run("ac-milan/verl_train_gnode5/g96tqll8")
df2 = run2.history()

: 

In [None]:
df = run.history()
df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def smooth_data(data, window_size):
    return data.rolling(window=window_size, min_periods=1).mean()

def plot_metrics(df, mode="cal", stat="mean", max_step=None, window_size=10, show_raw=True):
    metrics = ['Response Entropy 1', 'Effective Rank', 'Curvature']
    diffs = ['', ' diff', ' diff 2']
    categories = ['overall', 'correct', 'incorrect']

    # 构建匹配模式
    matched_cols = {}
    for metric in metrics:
        for diff in diffs:
            base_name = f"{metric}{diff}"
            for category in categories:
                if mode == "cal":
                    col_name = f"{mode}/{category}/layer_1/{base_name}/{stat}"
                elif mode == "val":
                    prefix = {"overall": "cal_overall", "correct": "cal_correct", "incorrect": "cal_incorrect"}[category]
                    col_name = f"val/{prefix}/simplelr_qwen/layer_1/{base_name}"

                if col_name in df.columns:
                    matched_cols[(metric, diff, category)] = col_name

    # 检查是否有数据
    if not matched_cols:
        raise ValueError(f"未找到任何匹配的列，请检查 mode='{mode}', stat='{stat}'")

    # 绘图设置
    fig, axes = plt.subplots(3, 3, figsize=(18, 12))
    colors = {'overall': 'blue', 'correct': 'green', 'incorrect': 'red'}

    for i, metric in enumerate(metrics):
        for j, diff in enumerate(diffs):
            ax = axes[i, j]
            found = False
            for category in categories:
                key = (metric, diff, category)
                if key not in matched_cols:
                    continue
                col_name = matched_cols[key]
                data = df[col_name]
                steps = df["_step"] if "_step" in df.columns else np.arange(len(data))

                # 截断到 max_step
                if max_step is not None:
                    mask = steps <= max_step
                    steps = steps[mask]
                    data = data[mask]

                smoothed = smooth_data(data, window_size)

                if show_raw:
                    ax.plot(steps, data, color=colors[category], alpha=0.3, linewidth=1, label=f"{category} raw")
                ax.plot(steps, smoothed, label=f"{category}", color=colors[category], linewidth=2)

                found = True

            ax.set_title(f"{metric} {diff}")
            if found:
                ax.legend()
            ax.grid(True)

    plt.suptitle(f"Metrics ({mode}, stat: {stat})", fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [None]:
# 假设 history_df 是你从 wandb.run.history() 或本地加载的 DataFrame
plot_metrics(
    df=df,
    mode='cal',          # 'cal' 表示训练数据，'val' 表示测试数据
    stat='mean',         # 可选 'mean', 'max', 'min'
    max_step=100,       # 最大 step 数
    window_size=50,      # 平滑窗口大小
    show_raw=False        # 是否显示原始曲线
)

In [None]:
def compare_metrics(
    df1,
    df2,
    label1="Run A",
    label2="Run B",
    mode="cal",
    stat="mean",
    max_step=None,
    window_size=10,
    show_raw=True,
    categories=("overall", "correct", "incorrect")  # ✅ 新增参数
):
    """
    使用 plot_metrics 的逻辑，在 df1 和 df2 上分别提取数据，并在同一张图中进行对比。
    
    参数与 plot_metrics 类似，新增：
        label1, label2: 两个 run 的图例标签
        categories: 要显示的类别，如 ("overall", "correct")
    """
    metrics = ['Response Entropy 1', 'Effective Rank', 'Curvature']
    diffs = ['', ' diff', ' diff 2']

    # 构建匹配模式（针对 df1）
    def get_matched_cols(df, mode, stat):
        matched_cols = {}
        for metric in metrics:
            for diff in diffs:
                base_name = f"{metric}{diff}"
                for category in categories:  # ✅ 改为使用传入的 categories 参数
                    if mode == "cal":
                        col_name = f"{mode}/{category}/layer_1/{base_name}/{stat}"
                    elif mode == "val":
                        prefix = {"overall": "cal_overall", "correct": "cal_correct", "incorrect": "cal_incorrect"}[category]
                        col_name = f"val/{prefix}/simplelr_qwen/layer_1/{base_name}"

                    if col_name in df.columns:
                        matched_cols[(metric, diff, category)] = col_name
        return matched_cols

    cols1 = get_matched_cols(df1, mode, stat)
    cols2 = get_matched_cols(df2, mode, stat)

    # 绘图设置
    fig, axes = plt.subplots(3, 3, figsize=(18, 12))
    colors = {'overall': 'blue', 'correct': 'green', 'incorrect': 'red'}

    for i, metric in enumerate(metrics):
        for j, diff in enumerate(diffs):
            ax = axes[i, j]
            found = False
            for category in categories:  # ✅ 这里也改为使用传入的 categories 参数
                key = (metric, diff, category)

                # 获取 df1 数据
                if key in cols1:
                    col_name = cols1[key]
                    data = df1[col_name]
                    steps = df1["_step"] if "_step" in df1.columns else np.arange(len(data))

                    if max_step is not None:
                        mask = steps <= max_step
                        steps = steps[mask]
                        data = data[mask]

                    smoothed = smooth_data(data, window_size)

                    ax.plot(steps, smoothed, label=f"{label1} - {category}", color=colors[category], linewidth=2)

                    if show_raw:
                        ax.plot(steps, data, color=colors[category], alpha=0.3, linewidth=1)

                # 获取 df2 数据
                if key in cols2:
                    col_name = cols2[key]
                    data = df2[col_name]
                    steps = df2["_step"] if "_step" in df2.columns else np.arange(len(data))

                    if max_step is not None:
                        mask = steps <= max_step
                        steps = steps[mask]
                        data = data[mask]

                    smoothed = smooth_data(data, window_size)

                    ax.plot(steps, smoothed, label=f"{label2} - {category}", color=colors[category], linewidth=2, linestyle='--')

                    if show_raw:
                        ax.plot(steps, data, color=colors[category], alpha=0.3, linewidth=1, linestyle='--')

                found = True

            ax.set_title(f"{metric} {diff}")
            if found:
                ax.legend()
            ax.grid(True)

    plt.suptitle(f"Comparison: {label1} vs {label2} ({mode}, stat: {stat})", fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [None]:
# 假设 df1 和 df2 是你从 wandb.run.history() 或本地加载的 DataFrame
compare_metrics(
    df1=df,
    df2=df2,
    label1="Baseline",
    label2="Improved",
    mode="cal",
    stat="mean",
    max_step=160,
    window_size=40,
    show_raw=False,
    categories=("overall",) # ("overall", "correct", "incorrect")
)