In [2]:
import json
import pandas as pd
from pathlib2 import Path
import sys

In [3]:
class Experiment(object):
  def __init__(self, path, output_checker):
    self.expr_path = Path(path).resolve()
    self.expr_name = self.expr_path.stem
    self.expr_json = self.expr_path.joinpath(self.expr_name + '_faults.json')
    self.expr_check = output_checker
    
    assert self.expr_json.exists(), \
          "File {name} not exists".format(name=self.expr_json)
    
    self.Records = pd.DataFrame()
    self.load()
   
  def load(self):
    runs = int()      # num of injections performed
    insn = dict()     # the target instruction
    func = dict()     # the function where the injection is performed
    orig = dict()     # the original value of the target state
    wrng = dict()     # the faulty value after injection
    flip = dict()     # the bit flipped
    stat = dict()     # the status of the target execution
    path = dict()     # execution path after injection
    spac = dict()     # the space, application space or library space
    libs = dict()     # the library where the failure is manifested
    
    DF = pd.DataFrame()
   
    with open(self.expr_json, 'r') as fh:
      faults = fh.readlines()
      
    runs = len(faults)
    
    for item in faults:
      try:
        record = json.loads(item)
      except:
        print(item)
        print(item[370:380])
        sys.exit()
      
      iid = record['id']    # injection id
      insn[iid] = record['fault']['instruction']
      func[iid] = record['fault']['parent_func']
      orig[iid] = record['fault']['normal_value']
      wrng[iid] = record['fault']['faulty_value']
      flip[iid] = record['fault']['bit_flipped']
      if record['fault']['library'] == None:
        spac[iid] = "App"
        libs[iid] = "None"
      else:
        spac[iid] = "lib"
        libs[iid] = record['fault']['library'] 
      stat[iid] = record['status']
      path[iid] = record['track']
      
#     DF = pd.concat([DF, pd.Series(insn, name='inst')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(func, name='func')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(orig, name='orig')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(wrng, name='wrng')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(flip, name='fbit')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(spac, name='spac')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(libs, name='libs')], axis=1, sort=True)
#     DF = pd.concat([DF, pd.Series(stat, name='stat')], axis=1, sort=True)
    DF = pd.concat([DF, pd.Series(insn, name='inst')], axis=1, sort=True)
    DF = pd.concat([DF, pd.Series(func, name='func')], axis=1, sort=True)
    DF = pd.concat([DF, pd.Series(spac, name='spac')], axis=1, sort=True)
    DF = pd.concat([DF, pd.Series(libs, name='libs')], axis=1, sort=True)
    DF = pd.concat([DF, pd.Series(stat, name='stat')], axis=1, sort=True)
    DF = pd.concat([DF, pd.Series(path, name='path')], axis=1, sort=True)
    
    self.Records = DF  
    
  def get_status_distribution(self):
    dist = self.Records['stat'].value_counts()
    res = self.parse_normal_executions()
    dist['Normal'] = 0
    for key, value in res.items():
      dist[key] = value
    #print(dist)
    return dist
  
  def get_crash_space_distribution(self):
    DF = self.Records
    crashes = DF.loc[DF['stat'] != 'Normal']
    crashes = DF.loc[DF['stat'] != 'Hang']
    dist = crashes['space'].value_counts()
    #print(dist)
    return dist
  
  def parse_normal_executions(self):
    DF = self.Records
    profile = self.expr_path.joinpath('profile/stdout')
    
    Normal = DF.loc[DF['stat'] == 'Normal']
    ids = list(Normal.index)

    res = dict()
    
    for i in ids:
      run = self.expr_path.joinpath(i).joinpath('stdout')
      try:
        status = self.expr_check(profile, run)
      except:
        print(i, '---> Profile: ', profile, '---> Run: ', run)
        # sys.exit()
      
      if status not in res:
        res[status] = 0
      res[status] += 1
    return res
  
  def get_sf_latency(self):
    DF = self.Records
    SF = DF.loc[DF['stat'].isin([11, '11', 'SIGSEGV', 'SIGBUS', \
                                 'SIGTRAP', 'SIGABRT'])]
    Path = SF['path'].apply(len)
    bins=[0, 10, 20, 50, 400, 500]
    return(Path.value_counts(bins=bins))
  
  def get_sigsegv_insn(self):
    DF = self.Records.loc[self.Records['stat'].isin(['SIGSEGV', '11'])]
    path = DF['path']
    
    for p in path:
      print(p[-1])
    

In [None]:
# HPCCG
def hpccg_output_checker(profile, run):
    assert Path(profile).exists()
    assert Path(run).exists()
    with open(profile) as fh:
        for line in fh.readlines():
            line = line.strip()
            if line.startswith('Number of iterations'):
                profile_iteration = int(line.split(':')[-1])
            if line.startswith('Final residual'):
                profile_residual = float(line.split(':')[-1])

    with open(run) as fh:
        for line in fh.readlines():
            line = line.strip()
            if line.startswith('Number of iterations'):
                run_iteration = int(line.split(':')[-1])
            if line.startswith('Final residual'):
                run_residual = float(line.split(':')[-1])

    # print(profile_residual, run_residual)
    if profile_residual != run_residual:
        return 'SDC'
    if profile_iteration < run_iteration:
        return 'DELAY'
    if profile_iteration > run_iteration:
        return 'EARLIAR'
    return 'Normal'

Expr = Experiment('hpccg_gdbfi_exp_dec_1', hpccg_output_checker)
Expr.get_status_distribution()

In [None]:
# LULESH
def lulesh_output_checker(profile, run):
    assert Path(profile).exists()
    assert Path(run).exists()
    
    with open(profile) as fh:
        lines = fh.readlines()
        line = lines[-12].strip()
        profile_steps = int(line.split('=')[1])
        line = lines[-11].strip()
        profile_FOE = float(line.split('=')[1])

    with open(run) as fh:
        lines = fh.readlines()
        line = lines[-12].strip()
        run_steps = int(line.split('=')[1])
        line = lines[-11].strip()
        run_FOE = float(line.split('=')[1])

    diff = abs(profile_FOE - run_FOE)/profile_FOE
    
    if diff != 0.0:
        return 'SDC'
    if profile_steps < run_steps:
        return 'DELAY'
    if profile_steps > run_steps:
        return 'EARLIAR'
    return 'Normal'
  
Expr = Experiment('lulesh_gdbfi_exp_dec_11', lulesh_output_checker)
Expr.get_status_distribution()

In [None]:
# miniFE
def minife_output_checker(profile, run):
    assert Path(profile).exists()
    assert Path(run).exists()
    
    with open(profile) as fh:
        lines = fh.readlines()
        line = lines[-2].strip()
        
        profile_steps = int(line.split('=')[1].split()[0])
        line = lines[-1].strip()
        profile_FRN = float(line.split(':')[1])

    with open(run) as fh:
        lines = fh.readlines()
        line = lines[-2].strip()
        run_steps = int(line.split('=')[1].split()[0])
        line = lines[-1].strip()
        run_FRN = float(line.split(':')[1])

    diff = abs(profile_FRN - run_FRN)/profile_FRN
    
    if diff !=0.0:
        return 'SDC'
    if profile_steps < run_steps:
        return 'DELAY'
    if profile_steps > run_steps:
        return 'EARLIAR'
    return 'Normal'
  
expr = Experiment('./miniFE_Expr_Dec_16', minife_output_checker)
expr.get_status_distribution()

inject-0001 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-0001/stdout
inject-0256 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-0256/stdout
inject-0781 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-0781/stdout
inject-1040 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-1040/stdout
inject-1057 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-1057/stdout
inject-1173 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-1173/stdout
inject-1423 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_Dec_16/inject-1423/stdout
inject-1502 ---> Profile:  /content/miniFE_Expr_Dec_16/profile/stdout ---> Run:  /content/miniFE_Expr_De

Normal     4259
SIGSEGV    3427
SIGBUS       51
SIGTRAP      35
SIGABRT       6
Hang          5
SDC        2197
Name: stat, dtype: int64

In [31]:
# miniMD
def minimd_output_checker(profile, run):
    assert Path(profile).exists()
    assert Path(run).exists()
    
    with open(profile) as fh:
        line = fh.readlines()[-3].split(' ')
        profile_steps = int(line[2])
        profile_t_total = float(line[4])

    with open(run) as fh:
        line = fh.readlines()[-3].split(' ')
        run_steps = int(line[2])
        run_t_total = float(line[4])

    diff = abs(profile_t_total - run_t_total)/profile_t_total
    
    if diff > 0.05:
        return 'SDC'
    if profile_steps < run_steps:
        return 'DELAY'
    if profile_steps > run_steps:
        return 'EARLIAR'
    return 'Normal'
  
expr = Experiment('./miniMD_Expr_Dec_30', minimd_output_checker)
expr.get_status_distribution()
# expr.get_sigsegv_insn()

Normal      951
SIGSEGV    4028
SIGABRT      25
SIGBUS        6
SIGTRAP       6
SDC        4984
Name: stat, dtype: int64

In [32]:
dist = expr.get_sf_latency()
print(dist.to_string())

401    981
2      871
4      650
12     594
3      342
41     177
6      150
43      82
7       75
5       66
13      15
14      13
10      11
9       10
8        6
17       5
32       4
21       3
15       2
22       2
190      1
60       1
130      1
173      1
31       1
210      1


In [None]:
# GTC-P
def gtcp_output_checker(profile, run):
  with open(profile) as fh:
    line = fh.readlines()[-21].split(' ')
    profile_efield = float(line[1].strip(',').split('=')[1])
    profile_eradial= float(line[2].strip(',').split('=')[1])
    profile_entropyi = float(line[3].strip(',').split('=')[1])
    profile_dflowi = float(line[4].strip(',').split('=')[1])
    profile_pfluxi = float(line[5].strip(',').split('=')[1])
    profile_efluxi = float(line[6].strip(',').split('=')[1])
    profile_weights = float(line[11].strip(',').split('=')[1])

  with open(run) as fh:
    line = fh.readlines()[-21].split(' ')
    run_efield = float(line[1].strip(',').split('=')[1])
    run_eradial= float(line[2].strip(',').split('=')[1])
    run_entropyi = float(line[3].strip(',').split('=')[1])
    run_dflowi = float(line[4].strip(',').split('=')[1])
    run_pfluxi = float(line[5].strip(',').split('=')[1])
    run_efluxi = float(line[6].strip(',').split('=')[1])
  
  efield_diff = abs(run_efield - profile_efield)
  eradial_diff = abs(run_eradial - profile_eradial)
  entropyi_diff = abs(run_entropyi - profile_entropyi)
  dflowi_diff = abs(run_dflowi - profile_dflowi)
  pfluxi_diff = abs(run_pfluxi - profile_pfluxi)
  efluxi_diff = abs(run_efluxi - profile_efluxi)
  
  if efield_diff != 0.0 or eradial_diff != 0.0 or entropyi_diff != 0.0:
    return 'SDC'
  if pfluxi_diff != 0.0 or efluxi_diff != 0.0 :
    return 'SDC'
  return "Normal"
  
expr = Experiment('./GTCP_Dec_30', gtcp_output_checker)
# expr.get_status_distribution()
expr.get_sigsegv_insn()

140380965142911:vmovsd  0(%rsi, %rdx, 8), %xmm6
94313789012016:movsd  %xmm1, 0(%rcx)
94544038061828:movsd  0(%r8), %xmm12
94048113984892:movsd  0(%r12, %rax, 8), %xmm2
94851039233243:movsd  0(%rax, %rbx, 2), %xmm0
94813400570346:movsd  0(%rax, %r12), %xmm3
94500730634435:movsd  %xmm6, 0(%rsi)
94623343777571:movsd  0(%rsi), %xmm10
94628584499799:movsd  0(%rax, %r15, 8), %xmm2
93990327838394:addsd  0(%r14), %xmm8
94860265432701:movl  %eax, %ecx
94127856084666:addsd  0(%r14), %xmm8
94025748670359:movsd  %xmm14, 0(%r10)
94829345140092:movsd  0(%rax, %r15, 8), %xmm6
94613149826003:movsd  0x10(%rbx, %rax), %xmm1
93924140953162:movsd  0(%r15), %xmm8
94890368412517:movsd  0(%rbx, %rdi, 8), %xmm8
94283686679860:movsd  %xmm3, 0(%rax, %r15, 8)
94329961762161:movsd  0(%rsi), %xmm9
94489049054796:movsd  0(%rbx, %rdx, 8), %xmm0
94288029550371:movsd  0(%rsi), %xmm10
139782009369076:movsd  %xmm0, 8(%rsp)
94666603721054:movsd  %xmm1, 0(%rdi)
94352960083642:addsd  0(%r14), %xmm8
94368465643668:movups  %

In [None]:
dist = expr.get_sf_latency()
print(dist.to_string())

2      200
401    144
3      119
8       80
4       73
5       59
13      47
10      38
6       38
12      33
20      31
7       29
44      23
81      23
9       21
22      21
27      20
17      17
14      16
19      13
137     12
18      11
11      11
50      10
33      10
15       9
138      8
32       8
113      7
39       6
35       6
37       5
25       5
30       5
146      5
145      5
139      5
256      4
153      4
36       4
24       4
46       4
26       4
16       3
140      3
252      3
142      3
47       3
28       3
94       3
78       3
60       3
259      2
55       2
34       2
21       2
31       2
45       2
23       1
308      1
38       1
40       1
255      1
254      1
165      1
163      1
150      1
143      1
136      1
128      1
104      1
102      1
101      1
98       1
76       1
74       1
70       1
68       1
65       1
62       1
61       1
59       1
58       1
51       1
361      1
49       1


In [8]:
# miniAMR
def miniAMR_output_checker(profile, run):
  return "Normal"
  
expr = Experiment('/home/cchen/Documents/Projects/CARE/Benchmarks/miniAMR/O1/screen', miniAMR_output_checker)
dist = expr.get_status_distribution()
latency = expr.get_sf_latency()
print(dist)
print(latency)
#expr.get_sigsegv_insn()

Normal     1022
SIGSEGV     934
Hang         32
SIGBUS        6
SIGABRT       4
SIGTRAP       2
Name: stat, dtype: int64
(-0.001, 10.0]    660
(20.0, 50.0]      134
(400.0, 500.0]     98
(10.0, 20.0]       50
(50.0, 400.0]       4
Name: path, dtype: int64
