In [1]:
import numpy as np
import random as rnd
import os
import re
from IPython.display import HTML, display
import matplotlib.pyplot as plt

In [2]:
all_languages = ['java_11_hotspot', 'java_11_openj9', 'java_8_hotspot', 'java_8_openj9', 'graal', 'nodejs', 'nodejs_chakra', 'go', 'go_gccgo', 'posix', 'posix_clang', 'posixmt', 'posixmt_clang']
java_languages = ['java_11_hotspot', 'java_11_openj9', 'java_8_hotspot', 'java_8_openj9', 'graal']
c_languages = ['posix', 'posix_clang', 'posixmt', 'posixmt_clang']
js_languages = ['nodejs', 'nodejs_chakra']
go_languages = ['go', 'go_gccgo']

fig_counter = 0
languages_count = len(all_languages)
all_syscalls_count = 385 #see https://github.com/torvalds/linux/blob/master/arch/x86/entry/syscalls/syscall_64.tbl
root='../../../target/logs'

In [3]:
def load_syscalls(filename):
    file = open(filename)
    while True:
        line = file.readline()      
        if not line:
            break
        if not line.startswith(' '):
            continue
        line = ' '.join(line.split()).split() #replaces multiple blank spaces by single spaces, and split
        yield line[-1]

def load_syscall_invokations(filename):
    file = open(filename)
    while True:
        line = file.readline()      
        if not line:
            break
        if not line.startswith(' '):
            continue
        line = ' '.join(line.split()).split() #replaces multiple blank spaces by single spaces, and split
        yield line[-1], int(line[3])
        
def all_common_syscalls(languages):        
    all_used_syscalls = []
    for language in languages:
        syscalls = np.array(list(load_syscalls(root + '/' + language + '/strace.log')))
        all_used_syscalls.extend(syscalls)
    all_used_syscalls = set(all_used_syscalls)
    common_syscalls = all_used_syscalls.copy()
    for language in languages:
        syscalls = np.array(list(load_syscalls(root + '/' + language + '/strace.log')))
        common_syscalls = common_syscalls & set(syscalls)
    all_used_syscalls = list(all_used_syscalls)
    incidence = len(all_used_syscalls)/all_syscalls_count
    incidence_common = len(common_syscalls)/all_syscalls_count
    return all_used_syscalls, common_syscalls, incidence, incidence_common

def all_invocations(languages):
    all_invocations = []
    for language in languages:
        invocations = np.array(list(load_syscall_invokations(root + '/' + language + '/strace.log')))
        all_invocations.extend(invocations)
    return all_invocations

all_used_syscalls, common_syscalls, incidence, incidence_common = all_common_syscalls(all_languages)
all_used_syscalls_java, common_syscalls_java, incidence_java, incidence_common_java = all_common_syscalls(java_languages)
all_used_syscalls_c, common_syscalls_c, incidence_c, incidence_common_c = all_common_syscalls(c_languages)
all_used_syscalls_js, common_syscalls_js, incidence_js, incidence_common_js = all_common_syscalls(js_languages)
all_used_syscalls_go, common_syscalls_go, incidence_go, incidence_common_go = all_common_syscalls(go_languages)

print('All:\t' + str(incidence) + '\t' + str(incidence_common) + '\t' + str(incidence/incidence_common))
print('Java:\t' + str(incidence_java) + '\t' + str(incidence_common_java) + '\t' + str(incidence_java/incidence_common_java))
print('C:\t' + str(incidence_c) + '\t' + str(incidence_common_c) + '\t' + str(incidence_c/incidence_common_c))
print('JS:\t' + str(incidence_js) + '\t' + str(incidence_common_js) + '\t' + str(incidence_js/incidence_common_js))
print('Go:\t' + str(incidence_go) + '\t' + str(incidence_common_go) + '\t' + str(incidence_go/incidence_common_go))

all_invocations(all_languages)

All:	0.2025974025974026	0.02857142857142857	7.090909090909092
Java:	0.16103896103896104	0.04935064935064935	3.263157894736842
C:	0.05454545454545454	0.05454545454545454	1.0
JS:	0.13506493506493505	0.09090909090909091	1.4857142857142855
Go:	0.09610389610389611	0.05454545454545454	1.761904761904762


In [4]:
def select(seed_language, N=5):
    language = seed_language
    syscalls = np.array(list(load_syscalls(root + '/' + language + '/strace.log')))
    
    selected_languages = []
    selected_languages.append(language)
    
    all_syscalls = []
    all_syscalls.extend(syscalls)
    
    common_syscalls = []
    common_syscalls.extend(syscalls)
    
    while len(selected_languages) < N:
        score = 0
        selected_language = ''
        for language in all_languages:
            if language in selected_languages:
                continue            
            syscalls = np.array(list(load_syscalls(root + '/' + language + '/strace.log')))
            common = set(common_syscalls) & set(syscalls)
            union = set(all_syscalls) | set(syscalls)
            if len(union)/len(common) > score:
                selected_language = language
                score = len(union)/len(common)
        selected_languages.append(selected_language)
        syscalls = np.array(list(load_syscalls(root + '/' + selected_language + '/strace.log')))
        common_syscalls = list(set(common_syscalls) & set(syscalls))
        all_syscalls = list(set(all_syscalls) | set(syscalls))
    return selected_languages, all_syscalls, common_syscalls

def all_select():
    N = 2
    previous_score = -1
    converged = False
    while N <= len(all_languages) and not converged:
        score = 0
        selected_languages = []
        all_syscalls = []
        common_syscalls = []
        for language in all_languages:
            selected_languages_temp, all_syscalls_temp, common_syscalls_temp = select(language, N)
            if len(all_syscalls_temp)/len(common_syscalls_temp) > score:
                score = len(all_syscalls_temp)/len(common_syscalls_temp)
                selected_languages = selected_languages_temp
                all_syscalls = all_syscalls_temp
                common_syscalls = common_syscalls_temp
        if score > previous_score:
            previous_score = score
            yield selected_languages, score
        else:
            return
        N = N + 1
        
list(all_select())

[(['java_11_openj9', 'graal'], 3.0526315789473686),
 (['java_11_openj9', 'graal', 'go'], 4.785714285714286),
 (['java_11_openj9', 'graal', 'go', 'nodejs'], 5.916666666666667),
 (['java_11_openj9', 'graal', 'go', 'nodejs', 'go_gccgo'], 6.545454545454546),
 (['java_11_openj9', 'graal', 'go', 'nodejs', 'go_gccgo', 'nodejs_chakra'],
  6.909090909090909),
 (['java_11_hotspot',
   'graal',
   'nodejs',
   'go',
   'java_11_openj9',
   'go_gccgo',
   'nodejs_chakra'],
  7.090909090909091)]

# SIMULATION 1: 

We assume syscall fail one by one, permanently.
If a language relies on a failed syscall, it is killed.
The simulation ends when all used syscalls have been failed, or when all languages have been killed.

In [5]:
print(str(len(all_used_syscalls)) + ' (' + str(incidence) + ')')
print(str(len(common_syscalls)) + ' (' + str(len(common_syscalls)/all_syscalls_count) + ')')

step = 0
killed = []
max_step = 1000
syscall_index = 0

rnd_used_syscalls = all_used_syscalls.copy()
rnd.shuffle(rnd_used_syscalls)
while len(killed) < languages_count and step < max_step and syscall_index < len(all_used_syscalls):
    r = np.random.uniform(0,1)
    if r <= incidence: #one syscall in all_used_syscalls is failed
        syscall = rnd_used_syscalls[syscall_index]
        new_killed = False
        for language in languages:
            if not language in killed:
                syscalls = np.array(list(load_syscalls(root + '/' + language + '/strace.log')))
                if syscall in syscalls:
                    killed.append(language)
                    new_killed = True
        if new_killed:
            print(killed)
        syscall_index = syscall_index + 1
    step = step + 1
    
print('steps: ' + str(step))
# print('killed: ' + str(killed))

78 (0.2025974025974026)
11 (0.02857142857142857)


NameError: name 'languages' is not defined