In [1]:
import os
def load_polybench(folder, subfolders):
    polybench = {}
    for each in subfolders:
        for subdir, dirs, files in os.walk(folder + os.sep + each):
            for file in files:
                if (file.endswith(".c") or file.endswith(".h")):
                    if (file[:-2] not in polybench):
                        polybench[file[:-2]] = {}
                    polybench[file[:-2]][file[-1]] = subdir + os.sep + file
    return polybench
def show_name(filename):
    return filename.split(os.sep)[-1]

In [2]:
folder = 'polybench-c-4.2.1-beta'
subfolders = ['datamining','linear-algebra','medley','stencils']
polybench = load_polybench(folder, subfolders)

In [3]:
for k,v in polybench.items():
    print("Program: {:<15} [{}, {}]".format(k, show_name(v["c"]), show_name(v["h"])))

Program: correlation     [correlation.c, correlation.h]
Program: covariance      [covariance.c, covariance.h]
Program: gemm            [gemm.c, gemm.h]
Program: gemver          [gemver.c, gemver.h]
Program: gesummv         [gesummv.c, gesummv.h]
Program: symm            [symm.c, symm.h]
Program: syr2k           [syr2k.c, syr2k.h]
Program: syrk            [syrk.c, syrk.h]
Program: trmm            [trmm.c, trmm.h]
Program: 2mm             [2mm.c, 2mm.h]
Program: 3mm             [3mm.c, 3mm.h]
Program: atax            [atax.c, atax.h]
Program: bicg            [bicg.c, bicg.h]
Program: doitgen         [doitgen.c, doitgen.h]
Program: mvt             [mvt.c, mvt.h]
Program: cholesky        [cholesky.c, cholesky.h]
Program: durbin          [durbin.c, durbin.h]
Program: gramschmidt     [gramschmidt.c, gramschmidt.h]
Program: lu              [lu.c, lu.h]
Program: ludcmp          [ludcmp.c, ludcmp.h]
Program: trisolv         [trisolv.c, trisolv.h]
Program: deriche         [deriche.c, deriche.h]


In [4]:
def analyze(filename, features):
    with open(filename, encoding='utf-8') as txt_file:
        raw_text = txt_file.read()
        while('*/' in raw_text):
            comment_start = raw_text.index('/*')
            comment_end = raw_text.index('*/')
            raw_text = raw_text[:comment_start] + raw_text[comment_end+2:]
        rows = [x.strip() for x in raw_text.split('\n')]
        rows = list(filter(lambda x: "#" not in x, rows))
        rows = list(filter(lambda x: len(x) > 0, rows))
        text = ' '.join(rows)
    return [text.count(x) for x in features]
def obtain_features(polybench, features):
    for k,v in polybench.items():
        print('\nProgram: {:<15} Features: '.format(k), end='')
        results = [sum(x) for x in zip(analyze(v['c'], features), analyze(v['h'], features))]
        for i,e in enumerate(features):
            print('count({}) = {:<5} '.format(e,results[i]), end='')
            v[e] = results[i]

In [5]:
features = ['for', 'if', '+', 'printf', '==', '-']
obtain_features(polybench, features)


Program: correlation     Features: count(for) = 13    count(if) = 1     count(+) = 32    count(printf) = 2     count(==) = 1     count(-) = 6     
Program: covariance      Features: count(for) = 11    count(if) = 1     count(+) = 25    count(printf) = 2     count(==) = 1     count(-) = 2     
Program: gemm            Features: count(for) = 12    count(if) = 1     count(+) = 30    count(printf) = 2     count(==) = 1     count(-) = 0     
Program: gemver          Features: count(for) = 10    count(if) = 1     count(+) = 30    count(printf) = 2     count(==) = 1     count(-) = 0     
Program: gesummv         Features: count(for) = 5     count(if) = 1     count(+) = 15    count(printf) = 2     count(==) = 1     count(-) = 0     
Program: symm            Features: count(for) = 10    count(if) = 1     count(+) = 30    count(printf) = 2     count(==) = 1     count(-) = 2     
Program: syr2k           Features: count(for) = 10    count(if) = 1     count(+) = 26    count(printf) = 2     count(

In [6]:
import csv
def save_features(polybench, features):
    with open('features.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Program'] + features)
        summary = []
        for k,v in polybench.items():
            summary.append([k] + [v[feature] for feature in features])
        writer.writerows(summary)
    print('features.csv successfully created!')

In [7]:
save_features(polybench, features)

features.csv successfully created!
