In [2]:
import pandas as pd
import torch
import time
from flash_attn import flash_attn_qkvpacked_func, flash_attn_func
from tqdm import tqdm  # 导入tqdm

# 检查是否有可用的GPU并设为默认设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 从Excel文件中读取数据
df = pd.read_excel('test_dim_data.xlsx', engine='openpyxl')

# 创建一个新的数据框用于保存运行时间
results = {
    'batchsize': [],
    'nheads': [],
    'headdim': [],
    'seqlen': [],
    'average_time': [],
    'all_times': []
}




In [16]:
torch.cuda.device_count()

1

In [3]:
device

device(type='cuda')

In [6]:
df["nheads"][0]

40

In [14]:
# 使用tqdm遍历每一行数据，将df.iterrows()包装在tqdm()中
batch_size = int(df['batchsize'][0])
nheads = int(df['nheads'][0])
headdim = int(df['headdim'][0])
seqlen = int(df['seqlen'][0])
q = torch.randn(batch_size, seqlen, nheads, headdim).to(device).half()
k = torch.randn(batch_size, seqlen, nheads, headdim).to(device).half()
v = torch.randn(batch_size, seqlen, nheads, headdim).to(device).half()
q.device


device(type='cuda', index=0)

In [11]:
q.dtype

torch.float16

In [12]:
batch_size = int(df['batchsize'][0])
nheads = int(df['nheads'][0])
headdim = int(df['headdim'][0])
seqlen = int(df['seqlen'][0])
q = torch.randn(batch_size, seqlen, nheads, headdim)
q.dtype

torch.float32

In [13]:
q.device

device(type='cpu')

In [None]:
# 使用tqdm遍历每一行数据，将df.iterrows()包装在tqdm()中
for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing rows"):
    batch_size = int(row['batchsize'])
    nheads = int(row['nheads'])
    headdim = int(row['headdim'])
    seqlen = int(row['seqlen'])
    
    times = []

    # 你也可以为内部的循环增加进度条
    for _ in tqdm(range(100), desc=f"Testing {seqlen} seqlen", leave=False):  # leave=False使进度条在完成后不占用空间
        q = torch.randn(batch_size, seqlen, nheads, headdim).to(device).half()
        k = torch.randn(batch_size, seqlen, nheads, headdim).to(device).half()
        v = torch.randn(batch_size, seqlen, nheads, headdim).to(device).half()

        start_time = time.time()
        flash_attn_func(q, k, v, dropout_p=0.0, softmax_scale=None, causal=False)
        end_time = time.time()
        times.append(end_time - start_time)
    
    average_time = sum(times) / 100

    # 保存结果
    results['batchsize'].append(batch_size)
    results['nheads'].append(nheads)
    results['headdim'].append(headdim)
    results['seqlen'].append(seqlen)
    results['average_time'].append(average_time)
    results['all_times'].append(times)

# 将结果保存到新的Excel文件中
results_df = pd.DataFrame(results)
results_df.to_excel('test_dim_data_results.xlsx', index=False)