In [1]:
import sys
sys.path.append('../')

In [2]:
from config import TrainConfig
from utils import make_env, prepare_datasets
from check_experiments.oz_env import *
from check_experiments.actions import OZ_FLAGS_SEQUENCE_NO_ANALYTICAL
from tqdm import tqdm

In [3]:
config = TrainConfig()
observations_names = ["Ir", "IrInstructionCountOz", "IrInstructionCount"]

In [4]:
env = make_env(config)
env_clone = env.fork()

benchmarks, _, _ = prepare_datasets(
    env,
    config.datasets,
    train_val_test_split=False,
    skipped=set(config.skipped_benchmarks),
)

In [5]:
data = {
    "benchmark": [],
    "ir_lines_number": [],
    "IrInstructionCountOz": [],
    "IrInstructionCount": [],
    "opt_ir_lines_number": [],
    "opt_IrInstructionCountOz": [],
    "opt_IrInstructionCount": [],
    "opt_cg_oz_ir_lines_number": [],
    "opt_cg_oz_IrInstructionCountOz": [],
    "opt_cg_oz_IrInstructionCount": [],
    "time": [],
}

In [6]:
cbench_path = "/home/flint/.local/share/compiler_gym/llvm-v0/benchmark/cbench-v1/contents/cBench-v1"
cbench_files = list(sorted(os.listdir(cbench_path)))

In [7]:
cbench_files

['adpcm.bc',
 'bitcount.bc',
 'blowfish.bc',
 'bzip2.bc',
 'crc32.bc',
 'dijkstra.bc',
 'ghostscript.bc',
 'gsm.bc',
 'ispell.bc',
 'jpeg-c.bc',
 'jpeg-d.bc',
 'lame.bc',
 'patricia.bc',
 'qsort.bc',
 'rijndael.bc',
 'sha.bc',
 'stringsearch.bc',
 'stringsearch2.bc',
 'susan.bc',
 'tiff2bw.bc',
 'tiff2rgba.bc',
 'tiffdither.bc',
 'tiffmedian.bc']

In [8]:
for benchmark_filename in tqdm(cbench_files):
    benchmark_path = os.path.join(cbench_path, benchmark_filename)
    with open(benchmark_path, "rb") as inf:
        ir_source = inf.read()
    observations = get_observations_by_raw_ir(env, ir_source, observations_names, use_bc=True)
    ir_source = str(env.observation["Ir"])

    data["benchmark"].append(str(benchmark_filename))
    data["ir_lines_number"].append(ir_source.count("\n"))
    observations = get_observations_by_raw_ir(env, ir_source, observations_names)
    data["IrInstructionCountOz"].append(observations["IrInstructionCountOz"])
    data["IrInstructionCount"].append(observations["IrInstructionCount"])

    with Timer() as timer:
        # observations получаются через env.make_benchmark поскольку 
        # здесь используется тектовый вид IR
        compressed_ir_source, observations = env_action(
            env,
            ir_source,
            OZ_FLAGS_SEQUENCE,
            observations=observations_names,
        )
    data["opt_ir_lines_number"].append(compressed_ir_source.count("\n"))
    data["opt_IrInstructionCountOz"].append(observations["IrInstructionCountOz"])
    data["opt_IrInstructionCount"].append(observations["IrInstructionCount"])

    program_name = benchmark_filename.split('.')[0]
    env.reset(benchmark=f"benchmark://cbench-v1/{program_name}")
    reward_sum = 0
    for flag in OZ_FLAGS_SEQUENCE_NO_ANALYTICAL:
        _, reward, _, _ = env.step(env.action_space.flags.index(flag))
        reward_sum += reward
    data["opt_cg_oz_ir_lines_number"].append(env.observation["Ir"].count("\n"))
    data["opt_cg_oz_IrInstructionCountOz"].append(observations["IrInstructionCountOz"])
    data["opt_cg_oz_IrInstructionCount"].append(observations["IrInstructionCount"])
    
    data["time"].append(timer.time)

100%|██████████████████████████████████████████| 23/23 [40:39<00:00, 106.06s/it]


In [9]:
pd_data = pd.DataFrame(data=data)
pd_data["opt_to_cgOz_comparise"] = (
    pd_data["opt_IrInstructionCount"] / pd_data["IrInstructionCountOz"]
)
pd_data["cg_Oz_flag_sequence_to_cg_Oz_comparise"] = (
    pd_data["opt_cg_oz_IrInstructionCount"] / pd_data["IrInstructionCountOz"]
)
pd_data

Unnamed: 0,benchmark,ir_lines_number,IrInstructionCountOz,IrInstructionCount,opt_ir_lines_number,opt_IrInstructionCountOz,opt_IrInstructionCount,opt_cg_oz_ir_lines_number,opt_cg_oz_IrInstructionCountOz,opt_cg_oz_IrInstructionCount,time,opt_to_cgOz_comparise,cg_Oz_flag_sequence_to_cg_Oz_comparise
0,adpcm.bc,793,209,567,359,222,223,360,222,223,0.18651,1.066986,1.066986
1,bitcount.bc,1168,405,857,1030,688,691,824,688,691,0.296337,1.706173,1.706173
2,blowfish.bc,4284,1933,3898,2315,1988,1994,2299,1988,1994,0.423841,1.031557,1.031557
3,bzip2.bc,36227,15946,28748,26237,19956,19972,25075,19956,19972,5.131609,1.252477,1.252477
4,crc32.bc,367,114,242,329,190,190,297,190,190,0.192927,1.666667,1.666667
5,dijkstra.bc,670,254,450,610,367,370,597,367,370,0.239277,1.456693,1.456693
6,ghostscript.bc,520982,177416,406198,393650,288664,290435,377029,288664,290435,60.41485,1.637028,1.637028
7,gsm.bc,18210,6870,14902,11095,9198,8994,11302,9198,8994,1.965649,1.30917,1.30917
8,ispell.bc,21899,8079,15184,19824,13359,13303,19204,13359,13303,3.48976,1.646615,1.646615
9,jpeg-c.bc,76372,30195,62452,58598,45175,45115,56742,45175,45115,9.797614,1.494122,1.494122


In [10]:
pd_data.to_csv("oz_results_2.csv")

In [11]:
env.close()
env_clone.close()