In [63]:
from dotenv import load_dotenv
load_dotenv()

import os
os.environ['PATH'] = '/opt/intel/oneapi/compiler/2022.1.0/linux/bin:' + os.environ['PATH']

In [2]:
import os
import re
import subprocess
import json


compiler = 'gcc'

if compiler == 'gcc' or 'clang':
    compiler_cmd = [compiler, '-fopenmp', '-O3']
elif compiler == 'icx':
    compiler_cmd = [compiler, '-qopenmp', '-O3', '-static', '-xHost']


with open("utilities/benchmark_list", 'r') as f:
    files = f.read().split()

res = []

for file in files:
    header_path = '/'.join(file.split('/')[1:-1])
    
    output_path = f'{file[:-2]}.check.clang.time'
    
    compile_cmd = compiler_cmd + ['-I', 'utilities', '-I', header_path, 'utilities/polybench.c', f'{file[:-2]}.check.c', '-DPOLYBENCH_TIME', '-DPOLYBENCH_DUMP_ARRAYS', '-DPOLYBENCH_CHECKSUM_ARRAYS', '-lm', '-o', output_path]
    
    compile_output = subprocess.run(compile_cmd, capture_output=True, text=True)
    
    if compile_output.returncode:
        print('compile: ', file)
        print(" ".join(compile_cmd))
    else:
        exec_cmd = [output_path]
        
        exec_output = subprocess.run(exec_cmd, capture_output=True, text=True)
        
        if exec_output.returncode:
            print('exec: ', file)
        else:
            sum = re.findall(r'begin dump:(((?!begin dump).)*)\nend   dump', exec_output.stderr, re.DOTALL)
            res.append({'code': file, 'time': exec_output.stdout[:-1], 'checksum': [s[0].split('\n') for s in sum]})
            
            for _ in range(2):
                exec_output = subprocess.run(exec_cmd, capture_output=True, text=True)
                sum = re.findall(r'begin dump:(((?!begin dump).)*)\nend   dump', exec_output.stderr, re.DOTALL)
                res.append({'code': file, 'time': exec_output.stdout[:-1], 'checksum': [s[0].split('\n') for s in sum]})
                
                
with open('./org_clang.jsonl', 'w') as f:
    for l in res:
        f.write(json.dumps(l) + '\n')

In [66]:
with open("utilities/benchmark_list", 'r') as f:
    files = f.read().split()
    
for file in files:
    os.rename(f'{file[2:-2]}.check.time', f'{file[2:-2]}.check.gcc.time')

In [5]:
import re
import os


with open("utilities/benchmark_list", 'r') as f:
    files = f.read().split()

for file in files:
    # print(file.split('/')[-1])
    with open(f'{file[2:-2]}.c', 'r') as f:
        lines = f.readlines()
        
    check_start = content_start = -1
    arrays = dict()
    i = 0
    while i < len(lines):
        if lines[i] == '/* Array initialization. */\n':
            lines[i-1:i-1] = ['#include <omp.h>\n', '#define ceild(n,d)  ceil(((double)(n))/((double)(d)))\n', '#define floord(n,d) floor(((double)(n))/((double)(d)))\n', '#define max(x,y) ((x) > (y)? (x) : (y))\n', '#define min(x,y) ((x) < (y)? (x) : (y))\n', "#ifndef POLYBENCH_DUMP_ARRAYS\n#define DUMP 0\n#else\n#define DUMP 1\n#endif\n", "#ifndef POLYBENCH_CHECKSUM_ARRAYS\n#define CHECKSUM 0\n#else\n#define CHECKSUM 1\n#endif\n", '\n']
            i += 8
        elif check_start == -1 and re.match(r'^void print_array', lines[i]):
            check_start = 1
        elif check_start == 1:
            if lines[i] == '{\n':
                check_start = -2
            else:
                output = re.search(r'DATA_TYPE POLYBENCH_(\d+)D\(([^,]*),', lines[i])
                if output:
                    dim = int(output.group(1))
                    arrays[output.group(2)] = lines[i][output.end(2):].split(')')[0].split(',')[-dim:]
                    # print(lines[i+1])
                    # print(output.group(1), output.group(2))
        elif content_start == -1 and re.search(r'POLYBENCH_DUMP_START', lines[i]):
            content_start = 1
        elif not content_start == -1:
            if re.match(r'^void kernel', lines[i]):
                break
            else:
                output = re.search(r'POLYBENCH_DUMP_BEGIN\("(.*)"', lines[i])
                if output:
                    array = output.group(1)
                    content_start = i
                    lines[i:i+1] = [lines[i], f'  DATA_TYPE tmp_{array} = 0;\n']
                    i += 1
                elif (re.search(r'if', lines[i]) and re.search(r'fprintf', lines[i + 1])) or ((re.search(r'fprintf', lines[i]) and re.search(r'if', lines[i + 1]))):
                    vars = 'ijk'
                    index = "[" + "][".join([vars[j] for j in range(len(arrays[array]))]) + "]"
                    
                    stmt_sum = '  ' * (len(arrays[array]) + 1) + f'if (CHECKSUM) tmp_{array} += {array}{index};\n'
                    
                    lines[i:i+2] = ['  ' * (len(arrays[array]) + 1) + 'if (DUMP) {\n', '  ' + lines[i], '  ' + lines[i + 1], '  ' * (len(arrays[array]) + 1) + '}\n' + stmt_sum]
                    
                    i += 4
                else:
                    if re.search(r'POLYBENCH_DUMP_END\("(.*)"', lines[i]): 
                        lines[i:i] = ['  if (CHECKSUM) {\n', '    fprintf(POLYBENCH_DUMP_TARGET,"\\nchecksum: ");\n', f'    fprintf(POLYBENCH_DUMP_TARGET, DATA_PRINTF_MODIFIER, tmp_{array});\n', '  }\n']
                            
                        i += 4
                    
        i += 1
        
    file_new = file[2:-2] + '.check.c'
        
    with open(f"{file_new}", 'w') as f:
        f.writelines(lines)
            

In [19]:
import re
import os


with open("utilities/benchmark_list", 'r') as f:
    files = f.read().split()

for file in files:
    with open(f'{file[:-2]}.h', 'r') as f:
        lines = f.readlines()
        
    params = {}
    get_start = 0
    put_start = put_end = -1
    for i in range(len(lines)):
        if get_start:
            if 'endif' in lines[i]:
                get_start = 0
            elif 'define' in lines[i]:
                output = re.search(r'define\s*([A-Z]*)\s*(\d+)', lines[i])
                params[output.group(1)] = output.group(2)
        elif 'LARGE_DATASET' in lines[i] and 'ifdef' in lines[i] and (not 'EXTRA' in lines[i]):
            if not get_start:
                get_start = 1
            else:
                print(f"get_start error: {file}\n")
                break
        elif '/* Default data type */' in lines[i]:
            lines.insert(put_end + 1, '/* params end */\n')
            lines.insert(put_start, '/* params start */\n')
            break
        elif 'POLYBENCH_LOOP_BOUND(' in lines[i]:
            if put_start == -1:
                put_start = i
            
            put_end = i
            
            output_0 = re.search(r'POLYBENCH_LOOP_BOUND', lines[i])
            
            for param in params:
                output_1 = re.search(rf'POLYBENCH_LOOP_BOUND\({param}', lines[i])
                if output_1:
                    val = params[param]
                    break
            else:
                print(f"search loop bound error: {file}\n")
                break
            
            lines[i] = lines[i][:output_0.start()] + val + '\n'
            
    # print("".join(lines))
    
    with open(f'{file[:-2]}.check.h', 'w') as f:
        f.writelines(lines)

In [18]:
print(lines[i])
output = re.search(r'define\s*([A-Z]*)\s*(\d+)', lines[i])
print(output)

#   define NI 800

<re.Match object; span=(4, 17), match='define NI 800'>


In [1]:
import os
for _ in range(10):
    os.system('../test>>../test.txt')