In [None]:
# Standard libraries
import os
from pathlib import Path
from multiprocessing import Pool
from pprint import pprint

# External
import numpy as np
import scipy.stats as stats
import pandas as pd
import yaml
from IPython.display import display

In [None]:
# Path to the directory with the '.c' files and '.d' dirs
benchmarkDir = '../tmp/seed_fns'

# Patterns to look for in desired files
patterns = {}
patterns['case0_O1'] = '*.d/*_case0_O1.info'
patterns['case0_Oz'] = '*.d/*_case0_Oz.info'

In [None]:
infoFilePaths = {progGroup: list(Path(benchmarkDir).glob(pattern)) for progGroup, pattern in patterns.items()}

In [None]:
runInParallel = True  # Run in parallel?

# Number of cpu cores (remove the `// 2` to use all the cores)
nproc = len(os.sched_getaffinity(0)) // 2 if runInParallel else 1
print(f'Using {nproc} core(s)')

chunksize = 512
print(f'Each cpu core will work on chunks of {chunksize} tasks')

In [None]:
# Loads .info files faster if you have parallel disk access
def parseInfo(infoFilePath):
    try:
        with open(infoFilePath, 'r') as infoFileHandle:
            try:
                info = yaml.safe_load(infoFileHandle)
            except Exception as e:
                print(f'{e}')
                return None
            else:
                return info
                
    except Exception as e:
        print(f'{e}')
        return None

# Turns each key in the inner dicts 'static' and 'dynamic' into a key in the
# outer dict, e.g., accessing infoFile['static']['instructions'] becomes
# infoFile['static_instructions']. This also makes pandas happier.
def flattenCfgInfo(cfgInfo):
    return {
        'cfg': cfgInfo['cfg'],
        'invoked': cfgInfo['invoked'],
        'complete': cfgInfo['complete'],
        'blocks': cfgInfo['blocks'],
        'phantoms': cfgInfo['phantoms'],
        'exit': cfgInfo['exit'],
        'halt': cfgInfo['halt'],
        'edges': cfgInfo['edges'],
        'static_instructions': cfgInfo['static']['instructions'],
        'static_calls': cfgInfo['static']['calls'],
        'static_signals': cfgInfo['static']['signals'],
        'dynamic_instructions': cfgInfo['dynamic']['instructions'],
        'dynamic_calls': cfgInfo['dynamic']['calls'],
        'dynamic_signals': cfgInfo['dynamic']['signals'],
        'name': cfgInfo['name'],
    }

In [None]:
desiredCols = ['name', 'static_instructions', 'dynamic_instructions']

print('The desired columns to be included in the dataframe are:')
pd.DataFrame([], columns=desiredCols)

In [None]:
df = {}
infoGroups = {}
with Pool(nproc) as pool:
    for group, files in infoFilePaths.items():
        res = pool.imap_unordered(parseInfo, files, chunksize)
        infoGroups[group] = [flattenCfgInfo(r[0]) for r in res if r]
        df[group] = pd.DataFrame(infoGroups[group], columns=desiredCols)

In [None]:
# Make sure the path leading to the last prefix directory exists
outputPrefix = 'output/cfgInfo'

# Warning: [ , . ; : ] are all allowed characters for linux files
csvSeparator = ';'

for group in patterns.keys():
    try:
        df[group].to_csv(Path(outputPrefix + f'_{group}.csv'), sep=csvSeparator, encoding='utf-8')
    except Exception as e:
        print(f'{e}')

In [None]:
for group in patterns.keys():
    print(f'{group}:')
    display(df[group].head())
    print('\n\n')

<hr>