In [1]:
import pandas as pd
import json

In [2]:
def get_recovery_faults(ffile):
    result = list()
    with open(ffile) as fh:
        for line in fh.readlines():
            record = json.loads(line)
            s = record['status']
            if s != 'Normal':
                continue
            result.append(record['id'])
    return result

def parse_screen_json(ffile, IDs):   
    result = list()
    with open(ffile) as fh:
        for line in fh.readlines():
            record = json.loads(line)
            if record['id'] not in IDs:
                continue
            loc = record['fault']['location']['type']
            if loc == 'mem':
                continue
            addr = record['fault']['insn_address']
            reg = record['fault']['location']['location']['base']
            fvalue = record['fault']['faulty_value']
            result.append({'ADDR': addr, 'REG': reg, 'VALUE': fvalue})
    return result

def target_statistic(data):
    t = list()
    for d in data:
        t.append(d['REG'])
    s = pd.Series(t)
    p = s.value_counts()
    print(p)
    return p

def save_to_file(path, data, targets = None):
    fh = open(path, 'a+')
    for r in data:
        if targets and r['REG'] not in targets:
            continue
        json.dump(r, fh)
        fh.write('\n')
        fh.flush()
        

In [39]:
# GTCP
gtcp_replay = '/home/cchen/Documents/Projects/CARE/Benchmarks/GTCP/O0/RExpr/tmp-worker-0.json'
gtcp_screen = '/home/cchen/Documents/Projects/CARE/Benchmarks/GTCP/O0/screen/screen_faults.json'
IDs = get_recovery_faults(gtcp_replay)
data = parse_screen_json(gtcp_screen, IDs)
p = target_statistic(data)
save_to_file('gtcp_mpi_fault.json', data, targets=['rdx', 'rcx', 'rax'])

rdx    93
rcx    83
eax    60
esi    60
rax    53
ecx    38
rsi     4
rbp     2
edi     2
edx     2
rdi     2
dtype: int64


In [41]:
# miniMD
minimd_replay = '/home/cchen/Documents/Projects/CARE/Benchmarks/miniMD/O0/RExpr/tmp-worker-0.json'
minimd_screen = '/home/cchen/Documents/Projects/CARE/Benchmarks/miniMD/O0/screen/screen_faults.json'
IDs = get_recovery_faults(minimd_replay)
data = parse_screen_json(minimd_screen, IDs)
p = target_statistic(data)
save_to_file('minimd_mpi_fault.json', data, targets=['rax'])

rax     196
rcx     138
rdx      77
edx      48
ecx      28
esi      27
rsi      23
eax      14
xmm0      1
dtype: int64


In [5]:
# HPCCG
hpccg_replay = '/home/cchen/Documents/Projects/CARE/mpi/HPCCG/O0/RExpr/RExpr_replay.json'
hpccg_screen = '/home/cchen/Documents/Projects/CARE/mpi/HPCCG/O0/screen/screen_faults.json'
IDs = get_recovery_faults(hpccg_replay)
data = parse_screen_json(hpccg_screen, IDs)
p = target_statistic(data)
save_to_file('hpccg_mpi_fault.json', data)

rax    340
rcx    268
rdx     26
dtype: int64


In [5]:
# miniFE
minife_replay = '/home/cchen/Documents/Projects/CARE/mpi/miniFE/O0/RExpr/RExpr_replay.json'
minife_screen = '/home/cchen/Documents/Projects/CARE/mpi/miniFE/O0/screen/screen_faults.json'
IDs = get_recovery_faults(minife_replay)
data = parse_screen_json(minife_screen, IDs)
p = target_statistic(data)
print(data)
save_to_file('minife_mpi_fault.json', data)

rcx    1
rdx    1
rdi    1
dtype: int64
[{'ADDR': '0x4225d1', 'REG': 'rdi', 'VALUE': 140737488343888}, {'ADDR': '0x418f15', 'REG': 'rdx', 'VALUE': 140737066913808}, {'ADDR': '0x418f0b', 'REG': 'rcx', 'VALUE': 140737028390928}]


In [9]:
# lulesh
lulesh_replay = '/home/cchen/Documents/Projects/CARE/mpi/lulesh/O0/RExpr/RExpr_replay.json'
lulesh_screen = '/home/cchen/Documents/Projects/CARE/mpi/lulesh/O0/screen/screen_faults.json'
IDs = get_recovery_faults(lulesh_replay)
data = parse_screen_json(lulesh_screen, IDs)
p = target_statistic(data)
save_to_file('lulesh_mpi_fault.json', data)

rcx    235
rax    215
rsi     29
esi     15
r12      7
rdi      3
rsp      3
rdx      2
dtype: int64
