In [4]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import tikzplotlib
import seaborn as sns

import json
from datetime import datetime

In [5]:
projects_df = pd.read_csv('/root/data/projects.csv',
                         parse_dates=['project_created_at', 'project_last_pushed_at', 'project_updated_at'])
projects_df['project_revision'] = projects_df.apply(lambda x: x['project_revision'][:10], axis=1)
packages_df = pd.read_csv('/root/data/packages_0_499.csv').dropna()
geiger_df = pd.read_csv('/root/data/geiger/geiger_findings_0_499.csv')
sampled_usages_app = pd.read_csv('/root/data/classification/sampled_usages_app.csv')
sampled_usages_std = pd.read_csv('/root/data/classification/sampled_usages_std.csv')

# Evaluation 1: with labeled data set

In [101]:
index = 39

snippet = sampled_usages_app[sampled_usages_app['label']=='cast-header'].iloc[index]

print("{}:{} in package {} of module {}\n".format(
    snippet['file_name'], int(snippet['line_number']), snippet['package_import_path'], snippet['module_path']))

print("{}@{}/{}/{}\n".format(snippet['module_path'], snippet['module_version'], 
                          snippet['package_import_path'][len(snippet['module_path'])+1:], snippet['file_name']))

print(snippet['context'])

bytestostr.go:21 in package github.com/mailru/easyjson/jlexer of module github.com/mailru/easyjson

github.com/mailru/easyjson@v0.7.0/jlexer/bytestostr.go

//
// chunk may be either blocked from being freed by GC because of a single string or the buffer.Data
// may be garbage-collected even when the string exists.
func bytesToStr(data []byte) string {
	h := (*reflect.SliceHeader)(unsafe.Pointer(&data))
	shdr := reflect.StringHeader{Data: h.Data, Len: h.Len}
	return *(*string)(unsafe.Pointer(&shdr))
}


# Evaluation 2: with manually analyzed projects

In [126]:
mdf = pd.merge(packages_df, projects_df, how='left', on='project_name', validate='many_to_one')

df = mdf[mdf['module_path']==mdf['project_root_module']]\
    .groupby(['project_name', 'project_root_module'])['package_unsafe_sum']\
    .agg(project_unsafe_sum='sum')\
    .reset_index()\
    .sort_values(by='project_unsafe_sum', ascending=False)

df[df['project_unsafe_sum']>0][:50]

Unnamed: 0,project_name,project_root_module,project_unsafe_sum
225,kubernetes/kubernetes,k8s.io/kubernetes,1887
58,cilium/cilium,github.com/cilium/cilium,416
153,gonum/gonum,gonum.org/v1/gonum,405
206,jetstack/cert-manager,github.com/jetstack/cert-manager,374
146,golang/mobile,golang.org/x/mobile,298
24,TykTechnologies/tyk,github.com/TykTechnologies/tyk,231
211,json-iterator/go,github.com/json-iterator/go,223
168,gorgonia/gorgonia,gorgonia.org/gorgonia,201
123,go-delve/delve,github.com/go-delve/delve,198
192,hybridgroup/gocv,gocv.io/x/gocv,188


In [182]:
pd.set_option('max_colwidth', 540)

packages_df\
    [packages_df['is_standard']==False]\
    .drop_duplicates(subset=['import_path', 'module_path']).dropna()\
    .sort_values(by='package_unsafe_sum', ascending=False)\
    .loc[:,['module_path', 'dir', 'import_path', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    [50:100]

Unnamed: 0,module_path,dir,import_path,loc,number_of_go_files,package_unsafe_sum
110673,github.com/google/gopacket,/root/download/google/gopacket/afpacket,github.com/google/gopacket/afpacket,246,2,91
63610,github.com/jetstack/cert-manager,/root/download/jetstack/cert-manager/pkg/internal/apis/acme/v1alpha2,github.com/jetstack/cert-manager/pkg/internal/apis/acme/v1alpha2,1415,5,82
63611,github.com/jetstack/cert-manager,/root/download/jetstack/cert-manager/pkg/internal/apis/acme/v1alpha3,github.com/jetstack/cert-manager/pkg/internal/apis/acme/v1alpha3,1415,5,82
2790,github.com/ishidawataru/sctp,/root/go/pkg/mod/github.com/ishidawataru/sctp@v0.0.0-20190723014705-7c296d48a2b5,github.com/ishidawataru/sctp,1166,3,78
102806,github.com/tendermint/tendermint,/root/download/tendermint/tendermint/crypto/secp256k1/internal/secp256k1,github.com/tendermint/tendermint/crypto/secp256k1/internal/secp256k1,0,0,78
7766,github.com/ethereum/go-ethereum,/root/download/ethereum/go-ethereum/crypto/secp256k1,github.com/ethereum/go-ethereum/crypto/secp256k1,0,0,78
41581,github.com/adriansr/gopacket,/root/go/pkg/mod/github.com/adriansr/gopacket@v1.1.18-0.20200327165309-dd62abfa8a41/afpacket,github.com/google/gopacket/afpacket,240,2,76
16690,github.com/influxdata/influxdb/v2,/root/download/influxdata/influxdb/tsdb/tsi1,github.com/influxdata/influxdb/v2/tsdb/tsi1,10415,17,74
92685,golang.org/x/tools,/root/download/golang/tools/go/ssa/interp,golang.org/x/tools/go/ssa/interp,3594,6,71
27177,github.com/xtaci/kcp-go/v5,/root/download/xtaci/kcptun/vendor/github.com/xtaci/kcp-go/v5,github.com/xtaci/kcp-go/v5,3713,12,70


Auswahl der Packages die analysiert werden sollen:

In [167]:
packages_df[packages_df['import_path']=='github.com/mailru/easyjson/jlexer']\
    .loc[:,['import_path', 'dir', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    .iloc[0]

import_path                                   github.com/mailru/easyjson/jlexer
dir                   /root/go/pkg/mod/github.com/mailru/easyjson@v0.7.0/jlexer
loc                                                                        1221
number_of_go_files                                                            3
package_unsafe_sum                                                            6
Name: 403, dtype: object

In [168]:
packages_df[packages_df['import_path']=='gorgonia.org/tensor/native']\
    .loc[:,['import_path', 'dir', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    .iloc[0]

import_path                                   gorgonia.org/tensor/native
dir                   /root/go/pkg/mod/gorgonia.org/tensor@v0.9.6/native
loc                                                                 1867
number_of_go_files                                                     4
package_unsafe_sum                                                   200
Name: 113397, dtype: object

In [169]:
packages_df[packages_df['import_path']=='github.com/cilium/ebpf']\
    .loc[:,['import_path', 'dir', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    .iloc[0]

import_path                                                               github.com/cilium/ebpf
dir                   /root/go/pkg/mod/github.com/cilium/ebpf@v0.0.0-20191113100448-d9fb101ca1fb
loc                                                                                         2823
number_of_go_files                                                                            13
package_unsafe_sum                                                                            61
Name: 2143, dtype: object

In [175]:
packages_df[packages_df['import_path']=='golang.org/x/tools/internal/event/label']\
    .loc[:,['import_path', 'dir', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    .iloc[1]

import_path                                                               golang.org/x/tools/internal/event/label
dir                   /root/go/pkg/mod/golang.org/x/tools@v0.0.0-20200502202811-ed308ab3e770/internal/event/label
loc                                                                                                           213
number_of_go_files                                                                                              1
package_unsafe_sum                                                                                              8
Name: 70525, dtype: object

In [190]:
packages_df[packages_df['import_path']=='github.com/anacrolix/mmsg/socket']\
    .loc[:,['import_path', 'dir', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    .iloc[0]

import_path                                   github.com/anacrolix/mmsg/socket
dir                   /root/go/pkg/mod/github.com/anacrolix/mmsg@v1.0.0/socket
loc                                                                       1016
number_of_go_files                                                          17
package_unsafe_sum                                                          48
Name: 105800, dtype: object

In [174]:
packages_df[packages_df['import_path']=='k8s.io/kubernetes/pkg/apis/core/v1']\
    .loc[:,['import_path', 'dir', 'loc', 'number_of_go_files', 'package_unsafe_sum']]\
    .iloc[0]

import_path                              k8s.io/kubernetes/pkg/apis/core/v1
dir                   /root/download/kubernetes/kubernetes/pkg/apis/core/v1
loc                                                                   10048
number_of_go_files                                                        6
package_unsafe_sum                                                      675
Name: 691, dtype: object

## Auswertung

In [26]:
from functools import reduce

def evaluate(package):
    data_path = "/root/code/hacking/go-safer-evaluation/packages-eval/data"
    
    manual_df = pd.read_csv("{}/{}-manual.csv".format(data_path, package))
    gosafer_df = pd.read_csv("{}/{}-gosafer.csv".format(data_path, package))
    govet_df = pd.read_csv("{}/{}-govet.csv".format(data_path, package))
    gosec_df = pd.read_csv("{}/{}-gosec.csv".format(data_path, package))
    
    gosafer_tp, gosafer_fp, gosafer_tn, gosafer_fn = prf(manual_df, gosafer_df, "go-safer", package)
    govet_tp, govet_fp, govet_tn, govet_fn = prf(manual_df, govet_df, "go vet", package)
    gosec_tp, gosec_fp, gosec_tn, gosec_fn = prf(manual_df, gosec_df, "gosec", package)
    
    return gosafer_tp, gosafer_fp, gosafer_tn, gosafer_fn, \
        govet_tp, govet_fp, govet_tn, govet_fn, gosec_tp, gosec_fp, gosec_tn, gosec_fn
    
def prf(manual_df, tool_df, name, package):
    df = pd.merge(manual_df, tool_df, how='outer', on=['file_name', 'line_number'])
    
    tp = df[(~df['message'].isna())&(df['label']=='YES')]['line_number'].count()
    fp = df[(~df['message'].isna())&(df['label']=='NO')]['line_number'].count() + \
         df[df['text'].isna()]['line_number'].count()
    tn = df[(df['message'].isna())&(df['label']=='NO')]['line_number'].count()
    fn = df[(df['message'].isna())&(df['label']=='YES')]['line_number'].count()
    
    p = tp / (tp + fp)
    r = tp / (tp + fn)
    a = (tp + tn) / (tp + fp + tn + fn)
    
    print("{}: {}: TP={}, FP={}, TN={}, FN={}, P={:.3}, R={:.3}, A={:.3}".format(
        package, name, tp, fp, tn, fn, p, r, a))
    
    return tp, fp, tn, fn

def evaluate_total():
    packages = ['v1', 'native', 'socket', 'ebpf', 'label', 'jlexer']
    
    gosafer_tp, gosafer_fp, gosafer_tn, gosafer_fn, \
    govet_tp, govet_fp, govet_tn, govet_fn, \
    gosec_tp, gosec_fp, gosec_tn, gosec_fn = \
        reduce(lambda a, b: (a[0]+b[0], a[1]+b[1], a[2]+b[2], a[3]+b[3], 
                             a[4]+b[4], a[5]+b[5], a[6]+b[6], a[7]+b[7], 
                             a[8]+b[8], a[9]+b[9], a[10]+b[10], a[11]+b[11]), 
               [evaluate(package) for package in packages])
    
    gosafer_p = gosafer_tp / (gosafer_tp + gosafer_fp)
    gosafer_r = gosafer_tp / (gosafer_tp + gosafer_fn)
    gosafer_a = (gosafer_tp + gosafer_tn) / (gosafer_tp + gosafer_fp + gosafer_tn + gosafer_fn)
    
    govet_p = govet_tp / (govet_tp + govet_fp)
    govet_r = govet_tp / (govet_tp + govet_fn)
    govet_a = (govet_tp + govet_tn) / (govet_tp + govet_fp + govet_tn + govet_fn)
    
    gosec_p = gosec_tp / (gosec_tp + gosec_fp)
    gosec_r = gosec_tp / (gosec_tp + gosec_fn)
    gosec_a = (gosec_tp + gosec_tn) / (gosec_tp + gosec_fp + gosec_tn + gosec_fn)
    
    print()
    print("Total: go-safer: TP={}, FP={}, TN={}, FN={}, P={:.3}, R={:.3}, A={:.3}".format(
        gosafer_tp, gosafer_fp, gosafer_tn, gosafer_fn, gosafer_p, gosafer_r, gosafer_a))
    print("Total: go vet: TP={}, FP={}, TN={}, FN={}, P={:.3}, R={:.3}, A={:.3}".format(
        govet_tp, govet_fp, govet_tn, govet_fn, govet_p, govet_r, govet_a))
    print("Total: gosec: TP={}, FP={}, TN={}, FN={}, P={:.3}, R={:.3}, A={:.3}".format(
        gosec_tp, gosec_fp, gosec_tn, gosec_fn, gosec_p, gosec_r, gosec_a))

In [16]:
evaluate('v1')

go-safer: TP=0, FP=0, TN=676, FN=0, P=nan, R=nan, A=1.0
go vet: TP=0, FP=0, TN=676, FN=0, P=nan, R=nan, A=1.0
gosec: TP=0, FP=676, TN=1, FN=0, P=0.0, R=nan, A=0.00148




In [17]:
evaluate('native')

go-safer: TP=48, FP=9, TN=101, FN=0, P=0.842, R=1.0, A=0.943
go vet: TP=0, FP=0, TN=109, FN=48, P=nan, R=0.0, A=0.694
gosec: TP=0, FP=98, TN=11, FN=48, P=0.0, R=0.0, A=0.0701




In [18]:
evaluate('socket')

go-safer: TP=0, FP=0, TN=115, FN=0, P=nan, R=nan, A=1.0
go vet: TP=0, FP=0, TN=115, FN=0, P=nan, R=nan, A=1.0
gosec: TP=0, FP=17, TN=99, FN=0, P=0.0, R=nan, A=0.853




In [19]:
evaluate('ebpf')

go-safer: TP=0, FP=1, TN=57, FN=0, P=0.0, R=nan, A=0.983
go vet: TP=0, FP=0, TN=58, FN=0, P=nan, R=nan, A=1.0
gosec: TP=0, FP=52, TN=27, FN=0, P=0.0, R=nan, A=0.342




In [20]:
evaluate('label')

go-safer: TP=0, FP=0, TN=5, FN=0, P=nan, R=nan, A=1.0
go vet: TP=0, FP=0, TN=5, FN=0, P=nan, R=nan, A=1.0
gosec: TP=0, FP=7, TN=1, FN=0, P=0.0, R=nan, A=0.125




In [21]:
evaluate('jlexer')

go-safer: TP=1, FP=0, TN=4, FN=0, P=1.0, R=1.0, A=1.0
go vet: TP=0, FP=0, TN=4, FN=1, P=nan, R=0.0, A=0.8
gosec: TP=0, FP=2, TN=2, FN=1, P=0.0, R=0.0, A=0.4




In [27]:
evaluate_total()



v1: go-safer: TP=0, FP=0, TN=676, FN=0, P=nan, R=nan, A=1.0
v1: go vet: TP=0, FP=0, TN=676, FN=0, P=nan, R=nan, A=1.0
v1: gosec: TP=0, FP=676, TN=1, FN=0, P=0.0, R=nan, A=0.00148
native: go-safer: TP=48, FP=9, TN=101, FN=0, P=0.842, R=1.0, A=0.943
native: go vet: TP=0, FP=0, TN=109, FN=48, P=nan, R=0.0, A=0.694
native: gosec: TP=0, FP=98, TN=11, FN=48, P=0.0, R=0.0, A=0.0701
socket: go-safer: TP=0, FP=0, TN=115, FN=0, P=nan, R=nan, A=1.0
socket: go vet: TP=0, FP=0, TN=115, FN=0, P=nan, R=nan, A=1.0
socket: gosec: TP=0, FP=17, TN=99, FN=0, P=0.0, R=nan, A=0.853
ebpf: go-safer: TP=0, FP=1, TN=57, FN=0, P=0.0, R=nan, A=0.983
ebpf: go vet: TP=0, FP=0, TN=58, FN=0, P=nan, R=nan, A=1.0
ebpf: gosec: TP=0, FP=52, TN=27, FN=0, P=0.0, R=nan, A=0.342
label: go-safer: TP=0, FP=0, TN=5, FN=0, P=nan, R=nan, A=1.0
label: go vet: TP=0, FP=0, TN=5, FN=0, P=nan, R=nan, A=1.0
label: gosec: TP=0, FP=7, TN=1, FN=0, P=0.0, R=nan, A=0.125
jlexer: go-safer: TP=1, FP=0, TN=4, FN=0, P=1.0, R=1.0, A=1.0
jlexer: 

