In [2]:
import csv
import random
import numpy as np

def generate_trace(mode="Web", filename="unknown-trace.csv"):
    # === 基本参数 ===
    DURATION_US = 10_000_000  # 总时长10秒（μs）
    LINK_CAPACITY_BPS = 100e9  # 100 Gbps
    SAMPLING_INTERVAL = 25     # 25μs采样
    NUM_KEYS = 1000
    ZIPF_SKEW = 1.2
    MTU = 1500

    # === 模式参数设置 ===
    if mode == "Web":
        BURST_INTERVAL_US = 10_000     # 每10ms一次突发
        BURST_DURATION_US = 50         # 突发持续50μs
        BURST_RATE = 60e9              # Burst期间：60Gbps
        NORMAL_RATE = 10e9             # 平时10Gbps
        burst_psize_dist = lambda: random.choices([1500, 1200, 800], weights=[0.6, 0.3, 0.1])[0]
        normal_psize_dist = lambda: random.choices([1500, 600, 200], weights=[0.2, 0.3, 0.5])[0]
    elif mode == "Cache":
        BURST_INTERVAL_US = 50_000     # 每50ms一次突发
        BURST_DURATION_US = 200        # 持续200μs
        BURST_RATE = 80e9              # Burst期间：80Gbps
        NORMAL_RATE = 20e9             # 平时20Gbps
        burst_psize_dist = lambda: random.choices([1500, 1000, 800], weights=[0.4, 0.4, 0.2])[0]
        normal_psize_dist = lambda: random.choices([1500, 800, 500], weights=[0.2, 0.4, 0.4])[0]
    else:
        raise ValueError("Unsupported mode: choose 'Web' or 'Cache'.")

    # 计算每周期总流量
    BURST_BYTES = int(BURST_RATE * BURST_DURATION_US * 1e-6 / 8)
    NORMAL_BYTES = int(NORMAL_RATE * SAMPLING_INTERVAL * 1e-6 / 8)

    # Zipf 分布流 ID
    flow_keys = np.random.zipf(ZIPF_SKEW, size=DURATION_US // 5)
    flow_keys = [k % NUM_KEYS for k in flow_keys]

    # 构造突发区间
    burst_ranges = []
    t = 0
    while t < DURATION_US:
        burst_ranges.append((t, t + BURST_DURATION_US))
        t += BURST_INTERVAL_US

    # 生成数据包
    packets = []
    cur_time = 0
    print(f"[{mode}] 开始生成数据包...")
    while cur_time < DURATION_US:
        in_burst = any(start <= cur_time < end for (start, end) in burst_ranges)
        if in_burst:
            total_bytes = BURST_BYTES
            label = 1
            psize_dist = burst_psize_dist
        else:
            total_bytes = NORMAL_BYTES
            label = 0
            psize_dist = normal_psize_dist

        sent_bytes = 0
        while sent_bytes < total_bytes and cur_time < DURATION_US:
            pkt_size = psize_dist()
            key = random.choice(flow_keys)
            packets.append([key, pkt_size, cur_time*1000, label])
            sent_bytes += pkt_size
            cur_time += random.randint(1, 10)  # 模拟包间隔（μs）

    print(f"[{mode}] 生成完毕，共 {len(packets)} 条记录")

    # 输出 CSV
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        # writer.writerow(["key", "packetSize", "timestamp", "label"])
        writer.writerows(packets)

    print(f"[{mode}] 已保存为 {filename}")


# 运行示例：
generate_trace(mode="Web", filename="web_trace.csv")
generate_trace(mode="Cache", filename="cache_trace.csv")


[Web] 开始生成数据包...
[Web] 生成完毕，共 1817163 条记录
[Web] 已保存为 web_trace.csv
[Cache] 开始生成数据包...
[Cache] 生成完毕，共 1817938 条记录
[Cache] 已保存为 cache_trace.csv
