In [1]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
from scipy.stats import hmean
import matplotlib.pyplot as plt

In [2]:
def csv_parser(log_file, results_file):
    results_list = []
    with open(log_file,"r") as fp:
        for line in fp:
            split = line.split()
            split2 = line.split(",")
            if(len(split)>0):
                if("pmpakos@" in split[0]):
                    matrix_name = ''
#                     print("---------")

            if(len(split)>2):
                if((split[0]=="INFO:") & (split[1]=="loading") & (split[2]=="Mtx")):
                    print(split[-1])
                    matrix_name = split[-1].split("/")[-1]
                    matrix_name = matrix_name.replace(".mtx","")
            if(len(split2)>2):
                if(split2[1]!=" matrix"):
                    line=line.replace("app",matrix_name).strip("DATA_CSV:,")
#                     line=line.replace(",","\t")
                    results_list.append(line)
                    print(line)

    file = open(results_file,"w")
    for res in results_list:
        file.write(res)
    file.close()

In [12]:
def clean_matrix_generation_log(start_gen, results_gen):
    fw = open(results_gen,"w")
    with open(start_gen) as fp:
        for line in fp:
            if(">>>> " in line):
                line = line.strip(">>>> ").replace(" ",",").replace(".mtx","")
                line_spl = line.split(",")
                line_spl[0] = line_spl[0].replace(".0_","_")
                line_spl[0] = line_spl[0].split("_")
                del line_spl[0][3]
                line_spl[0] = "_".join(line_spl[0])
                line_new = ",".join(line_spl)
                fw.write(line_new)
    fw.close()

def clean_results(start_list, results_list):
    for t1,t2 in zip(start_list,results_list):
        fw = open(t2,"w")
        with open(t1) as fp:
            for line in fp:
                if("DATA_CSV:," in line):
                    line_new = line.strip("DATA_CSV:,").replace(".0_","_")
                    fw.write(line_new)
        fw.close()
    
def read_results_single(result_file):
    header = ["matrix_name","original rows","original cols","original NNZs","padded rows","padded cols","padded NNZs","padding overhead[%]","num of runs","total run time[sec]","time[ms]/run","performance[GFLOPs]","performance (padded)[GFLOPs]","mem_footprint[MB]"]
    df = pd.read_table(result_file, delimiter =",", names=header)
    return df

def read_results(results_list, results_gen, results_csv):
    header = ["matrix_name","mem_range","avg_nnz_per_row","std_nnz_per_row","avg_bw","std_bw","avg_sc","std_sc"]
    df_gen = pd.read_table(results_gen, delimiter =",", names=header) 
    
    dataframes = []
    for result_file in results_list:
        dataframes.append(read_results_single(result_file))
    matrices = dataframes[0]["matrix_name"].unique()

    n_runs = len(results_list)
    results_final = []
    for matrix in matrices:
        matrix_list = matrix.split("_")
        mtx_name = matrix_list[0]
    
        nr_rows = int(matrix_list[1])
        nr_cols = int(matrix_list[2])
        
        if(len(matrix_list)==7):#random
            placement = "random"
            diagonal_factor = 1
        else: #diagonal
            placement = "diagonal"
            diagonal_factor = matrix_list[6]

        seed = matrix_list[-1].strip("n").strip("g")
        
        distr = list(filter(lambda x: x.isalpha(), matrix_list[-1]))[0]
        if(distr=="g"):
            distribution="gamma"
        else:
            distribution="normal"

        nr_nnz = np.asarray(dataframes[0][dataframes[0]["matrix_name"]==matrix]["original NNZs"])[0]
        density = nr_nnz/(nr_rows*nr_cols)*100
                
        runtime_iter = np.zeros(shape=(n_runs,1))
        perf_padded = np.zeros(shape=(n_runs,1))
        mem_footprint = np.zeros(shape=(n_runs,1))

        for run in range(0,n_runs):
            df = dataframes[run]
            runtime_iter[run] = np.asarray(df[df["matrix_name"]==matrix]["time[ms]/run"])
            perf_padded[run] = np.asarray(df[df["matrix_name"]==matrix]["performance (padded)[GFLOPs]"])
            mem_footprint[run] = np.asarray(df[df["matrix_name"]==matrix]["mem_footprint[MB]"])
        if(perf_padded[0][0]<0): # why? nobody knows why
            continue

        runtime_iter_hm = hmean(runtime_iter,axis=0)[0]
        perf_padded_hm = hmean(perf_padded,axis=0)[0]
        mem_footprint = hmean(mem_footprint,axis=0)[0]
        
        selected_gen = df_gen[df_gen["matrix_name"]==matrix]
        mem_range = np.asarray(selected_gen["mem_range"])[0]
        avg_nnz_per_row = np.asarray(selected_gen["avg_nnz_per_row"])[0]
        std_nnz_per_row = np.asarray(selected_gen["std_nnz_per_row"])[0]
        avg_bw = np.asarray(selected_gen["avg_bw"])[0]
        std_bw = np.asarray(selected_gen["std_bw"])[0]
        avg_sc = np.asarray(selected_gen["avg_sc"])[0]
        std_sc = np.asarray(selected_gen["std_sc"])[0]/nr_cols

        W_avg = 33
        J_estimated = W_avg*runtime_iter_hm
        
        line_list = [mtx_name, distribution, placement, diagonal_factor, seed,
                     nr_rows, nr_cols, nr_nnz, density, mem_footprint, mem_range,
                     avg_nnz_per_row, std_nnz_per_row, 
                     avg_bw, std_bw, avg_sc, std_sc,
                     "Xilinx_SpMV", runtime_iter_hm, perf_padded_hm, W_avg, J_estimated
                    ]
        line = ",".join(str(x) for x in line_list) + "\n"
        results_final.append(line)
        
    file = open(results_csv,"w")
    for line in results_final:
        file.write(line)
    file.close()

    
def extract_results_of_distr_memrange(distr_memrange):
    start_gen = "./generation_stats/"+distr_memrange+"_log.txt"
    results_gen = "./generation_stats/"+distr_memrange+"_log_CLEAN.txt"
    clean_matrix_generation_log(start_gen, results_gen)

    start_list = [
        "./dirty/"+distr_memrange+"_run1.txt",
    ]

    results_list = [
        "./clean/"+distr_memrange+"_run_CLEAN1.txt",
    ]
    clean_results(start_list, results_list)

    results_csv = "./results_"+distr_memrange+".csv"
    read_results(results_list, results_gen, results_csv)

In [9]:
%%time
distr_memrange = "gamma_4-8"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 8.84 s, sys: 0 ns, total: 8.84 s
Wall time: 9.61 s


In [10]:
%%time
distr_memrange = "small_normal_4-8"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 250 ms, sys: 0 ns, total: 250 ms
Wall time: 267 ms


In [11]:
%%time
distr_memrange = "small_normal_4-8"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 233 ms, sys: 0 ns, total: 233 ms
Wall time: 248 ms


In [12]:
%%time
distr_memrange = "small_normal_8-16"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 221 ms, sys: 0 ns, total: 221 ms
Wall time: 389 ms


In [13]:
%%time
distr_memrange = "small_normal_16-32"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 260 ms, sys: 0 ns, total: 260 ms
Wall time: 318 ms


In [14]:
%%time
distr_memrange = "small_normal_32-64"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 329 ms, sys: 0 ns, total: 329 ms
Wall time: 386 ms


In [15]:
%%time
distr_memrange = "small_normal_64-128"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 372 ms, sys: 0 ns, total: 372 ms
Wall time: 488 ms


In [16]:
%%time
distr_memrange = "small_normal_128-256"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 409 ms, sys: 0 ns, total: 409 ms
Wall time: 628 ms


In [13]:
%%time
distr_memrange = "small_normal_256-512"
extract_results_of_distr_memrange(distr_memrange)

CPU times: user 235 ms, sys: 0 ns, total: 235 ms
Wall time: 290 ms
