In [1]:
import pandas as pd

In [2]:
def load_data(path):
    data = []
    with open(path) as file:
        for line in file:
            line = line.strip()
            data.append(line)
    return data

In [3]:
def processing_data(data):
    index = []
    for i in range(len(data)):
        if len(data[i]) and data[i][0] == '=':
            index.append(i)
    data = data[index[2]:index[-2]]
    heading = data[1]
    data = data[3::10]
    heading = heading.split()
    data = [line.split() for line in data]
    return heading, data

In [4]:
def get_average_gflops(df, group):
    grouped_df = df.groupby(group)
    avg_gflops = grouped_df['Gflops'].apply(lambda x: x.mean())
    avg_gflops_df = avg_gflops.reset_index()
    return avg_gflops_df

###  单机/集群系统性能调优及分析

In [5]:
data = load_data('./result/data/data.txt')

In [6]:
heading, data = processing_data(data)

In [7]:
df = pd.DataFrame(data, columns=heading)

In [9]:
# 将'Gflops'列转换为浮点数类型
df['Gflops'] = df['Gflops'].astype(float)

In [10]:
#保存为Excel文件
df.to_excel('./result/data/data.xlsx', index=False)

In [11]:
df

Unnamed: 0,T/V,N,NB,P,Q,Time,Gflops
0,WR00L2L2,1960,60,2,2,2.96,1.69710
1,WR00L2L4,1960,60,2,2,3.68,1.36600
2,WR00L2C2,1960,60,2,2,3.99,1.26030
3,WR00L2C4,1960,60,2,2,3.73,1.34740
4,WR00L2R2,1960,60,2,2,3.45,1.45820
...,...,...,...,...,...,...,...
139,WR00R2L4,2048,80,4,1,5.38,1.06510
140,WR00R2C2,2048,80,4,1,5.78,0.99225
141,WR00R2C4,2048,80,4,1,6.31,0.90874
142,WR00R2R2,2048,80,4,1,5.94,0.96547


In [13]:
avg_gflops_df = get_average_gflops(df, ['N', 'NB', 'P', 'Q'])

In [14]:
avg_gflops_df

Unnamed: 0,N,NB,P,Q,Gflops
0,1960,60,2,2,1.367706
1,1960,60,4,1,0.762098
2,1960,80,2,2,1.493839
3,1960,80,4,1,0.877724
4,2048,60,2,2,1.571744
5,2048,60,4,1,1.010337
6,2048,80,2,2,1.661311
7,2048,80,4,1,0.975026


### 线程性能测试

In [15]:
threads_data = []
heading = []
threads = []

In [17]:
for i in range(4, 13):
    path = "./result/thread/thread" + str(i) + ".txt"
    thread_data = load_data(path)
    heading, thread_data = processing_data(thread_data)
    threads_data.extend(thread_data)
    threads.extend([i] * 18)

In [21]:
df = pd.DataFrame(threads_data, columns=heading)
df['Gflops'] = df['Gflops'].astype(float)
df.insert(0, 'Threads', threads)

In [22]:
df

Unnamed: 0,Threads,T/V,N,NB,P,Q,Time,Gflops
0,4,WR00L2L2,2048,64,2,2,2.63,2.1762
1,4,WR00L2L4,2048,64,2,2,2.74,2.0956
2,4,WR00L2C2,2048,64,2,2,3.08,1.8641
3,4,WR00L2C4,2048,64,2,2,2.38,2.4067
4,4,WR00L2R2,2048,64,2,2,3.46,1.6586
...,...,...,...,...,...,...,...,...
157,12,WR00R2L4,2048,64,2,2,0.32,17.7520
158,12,WR00R2C2,2048,64,2,2,0.36,15.8900
159,12,WR00R2C4,2048,64,2,2,0.33,17.5020
160,12,WR00R2R2,2048,64,2,2,0.33,17.5600


In [23]:
avg_gflops_df = get_average_gflops(df, ['Threads'])

In [24]:
avg_gflops_df

Unnamed: 0,Threads,Gflops
0,4,1.949344
1,5,2.042211
2,6,2.092594
3,7,6.413617
4,8,5.9625
5,9,6.158506
6,10,16.858278
7,11,16.868
8,12,16.760111


#### 规模测试

In [25]:
size_data = load_data("./result/size/size.txt")
heading, size_data = processing_data(size_data)

In [26]:
df = pd.DataFrame(size_data, columns=heading)
df['Gflops'] = df['Gflops'].astype(float)

In [27]:
df

Unnamed: 0,T/V,N,NB,P,Q,Time,Gflops
0,WR00L2L2,64,64,2,2,0.05,0.003869
1,WR00L2L4,64,64,2,2,0.03,0.006516
2,WR00L2C2,64,64,2,2,0.03,0.005937
3,WR00L2C4,64,64,2,2,0.05,0.003568
4,WR00L2R2,64,64,2,2,0.01,0.013943
...,...,...,...,...,...,...,...
139,WR00R2L4,8192,64,2,2,19.72,18.593000
140,WR00R2C2,8192,64,2,2,18.77,19.532000
141,WR00R2C4,8192,64,2,2,19.38,18.912000
142,WR00R2R2,8192,64,2,2,19.09,19.206000


In [30]:
df['N'] = df['N'].astype(int)

In [31]:
avg_gflops = get_average_gflops(df, ['N'])

In [32]:
avg_gflops

Unnamed: 0,N,Gflops
0,64,0.006527
1,128,0.015897
2,256,0.046598
3,512,0.150258
4,1024,0.570428
5,2048,2.0377
6,4096,7.466644
7,8192,18.559833
