In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import json
from datetime import datetime

In [32]:
def date_formatter(date):
    return date.__format__("%Y-%m-%d")

# Table B.1 Projects

In [2]:
projects_df = pd.read_csv('/root/data/projects.csv',
                         parse_dates=['project_created_at', 'project_last_pushed_at', 'project_updated_at'])

In [5]:
projects_df.columns

Index(['project_name', 'project_rank', 'project_github_clone_url',
       'project_number_of_stars', 'project_number_of_forks',
       'project_github_id', 'project_revision', 'project_created_at',
       'project_last_pushed_at', 'project_updated_at', 'project_size',
       'project_checkout_path'],
      dtype='object')

In [41]:
df = projects_df.loc[:,['project_name', 'project_number_of_stars', 'project_number_of_forks', 
                   'project_last_pushed_at', 'project_revision']]

df['project_revision'] = df.apply(lambda x: x['project_revision'][:10], axis=1)
df.index = df.index + 1

df

Unnamed: 0,project_name,project_number_of_stars,project_number_of_forks,project_last_pushed_at,project_revision
1,golang/go,72988,10460,2020-05-28 14:45:33+00:00,6bf2eea62a
2,kubernetes/kubernetes,66512,23806,2020-05-28 19:09:23+00:00,fb9e1946b0
3,moby/moby,57189,16540,2020-05-28 18:55:57+00:00,763f9e799b
4,avelino/awesome-go,54733,7267,2020-05-28 00:00:53+00:00,3e27d63fe2
5,gohugoio/hugo,44317,5049,2020-05-28 17:46:37+00:00,6a3e89743c
...,...,...,...,...,...
496,disintegration/imaging,3089,271,2020-05-26 06:37:58+00:00,879073f233
497,gogf/gf,3087,450,2020-05-28 12:28:20+00:00,269378aa0d
498,googleforgames/agones,3082,356,2020-05-28 18:00:02+00:00,d017250c43
499,kubernetes-sigs/external-dns,3078,926,2020-05-28 17:00:34+00:00,deaeca2ab1


In [42]:
print(df.to_latex(header=['Name', 'Stars', 'Forks', 'Created', 'Revision'], 
    index=True, formatters={'project_last_pushed_at': date_formatter}))

\begin{tabular}{llrrll}
\toprule
{} &                                               Name &  Stars &  Forks &    Created &    Revision \\
\midrule
1   &                                          golang/go &  72988 &  10460 & 2020-05-28 &  6bf2eea62a \\
2   &                              kubernetes/kubernetes &  66512 &  23806 & 2020-05-28 &  fb9e1946b0 \\
3   &                                          moby/moby &  57189 &  16540 & 2020-05-28 &  763f9e799b \\
4   &                                 avelino/awesome-go &  54733 &   7267 & 2020-05-28 &  3e27d63fe2 \\
5   &                                      gohugoio/hugo &  44317 &   5049 & 2020-05-28 &  6a3e89743c \\
6   &                                      gin-gonic/gin &  38459 &   4441 & 2020-05-27 &  5e40c1d49c \\
7   &                                       fatedier/frp &  36184 &   6860 & 2020-05-25 &  2406ecdfea \\
8   &          astaxie/build-web-application-with-golang &  34787 &   9516 & 2020-04-11 &  606abd586a \\
9   &         

# Tables D.1 and D.2: Packages in samples for small-scale study

In [155]:
sampled_usages_std = pd.read_csv('/root/data/classification/sampled_usages_std.csv')
sampled_usages_app = pd.read_csv('/root/data/classification/sampled_usages_app.csv')

In [65]:
print(sampled_usages_app\
    .groupby('package_import_path')['line_number'].count().reset_index(name='samples').iloc[:,:2]\
    .sort_values(by='samples', ascending=False)\
    .to_latex(index=False, longtable=True))

\begin{longtable}{lr}
\toprule
                               package\_import\_path &  samples \\
\midrule
\endhead
\midrule
\multicolumn{2}{r}{{Continued on next page}} \\
\midrule
\endfoot

\bottomrule
\endlastfoot
                k8s.io/kubernetes/pkg/apis/core/v1 &      266 \\
                       github.com/json-iterator/go &      109 \\
                 github.com/vishvananda/netlink/nl &       58 \\
                        github.com/ugorji/go/codec &       56 \\
           github.com/elastic/go-structform/gotype &       45 \\
 k8s.io/apiextensions-apiserver/pkg/apis/apiext... &       40 \\
                     github.com/modern-go/reflect2 &       32 \\
 k8s.io/apiextensions-apiserver/pkg/apis/apiext... &       28 \\
           k8s.io/apiserver/pkg/apis/audit/v1beta1 &       26 \\
             github.com/hashicorp/go-msgpack/codec &       23 \\
                        gorgonia.org/tensor/native &       21 \\
                k8s.io/apiserver/pkg/apis/audit/v1 &       20 \\
   

In [66]:
print(sampled_usages_std\
    .groupby('package_import_path')['line_number'].count().reset_index(name='samples').iloc[:,:2]\
    .sort_values(by='samples', ascending=False)\
    .to_latex(index=False, longtable=True))

\begin{longtable}{lr}
\toprule
     package\_import\_path &  samples \\
\midrule
\endhead
\midrule
\multicolumn{2}{r}{{Continued on next page}} \\
\midrule
\endfoot

\bottomrule
\endlastfoot
   golang.org/x/sys/unix &      225 \\
                 runtime &       96 \\
                 syscall &       32 \\
                 reflect &       27 \\
    internal/reflectlite &        8 \\
                    sync &        3 \\
           internal/poll &        2 \\
 runtime/internal/atomic &        2 \\
  crypto/internal/subtle &        1 \\
                go/types &        1 \\
           internal/race &        1 \\
           runtime/pprof &        1 \\
                 strings &        1 \\
\end{longtable}



# Tables 7.2 and 7.3: Classification summary

In [85]:
def number_or_nothing(n):
    if n > 0:
        return n
    else:
        return ""

In [158]:
sampled_usages_std['label'].nunique()

12

In [154]:
labels2_in_order = ['efficiency', 'generics', 'serialization', 'inevitable', 'safer-reflect', 'layout-control',
                  'escape-analysis-escape', 'unused', 'documentation']
labels2_abbrev = { 'generics': 'gen', 'efficiency': 'eff', 'serialization': 'ser', 'inevitable': 'inev',
                'safer-reflect': 'SR', 'layout-control': 'LC', 'escape-analysis-escape': 'EA', 'unused': 'UU',
                'documentation': 'doc'}
labels1_in_order = ['conversion-struct-struct', 'conversion-struct-basic', 'conversion-header', 'conversion-struct-bytes',
                   'direct-memory-access', 'pointer-arithmetic-memory-layout', 'data-structure', 'delegate',
                   'type-reflection', 'syscall', 'unused', 'comment']

column_sums = [0] * len(labels2_in_order)

classification_summary_app = [[' '] + [labels2_abbrev[label2] for label2 in labels2_in_order] + ['.']]

for label1 in labels1_in_order:
    values = [sampled_usages_app\
                .where(sampled_usages_app['label']==label1)\
                .where(sampled_usages_app['label2']==label2)\
                .dropna()\
                ['line_number'].count()
             for label2 in labels2_in_order]
    
    for i, value in enumerate(values):
        column_sums[i] += value
    
    classification_summary_app.append([label1] + [number_or_nothing(value) for value in values] + [sum(values)])

classification_summary_app.append(['.'] + column_sums + [sum(column_sums)])
    
column_names = classification_summary_app.pop(0)
df = pd.DataFrame(classification_summary_app, columns=column_names)

print(df.to_latex(index=False))

\begin{tabular}{llllllllllr}
\toprule
                                  &  eff &  gen & ser & inev &  SR &  LC & EAE &  UU & doc &     . \\
\midrule
         conversion-struct-struct &  396 &   58 &   7 &    2 &   2 &   1 &     &     &     &   466 \\
          conversion-struct-basic &   80 &   35 &   5 &      &     &   1 &     &     &     &   121 \\
                conversion-header &   26 &    8 &   3 &      &     &     &     &     &     &    37 \\
          conversion-struct-bytes &   21 &    1 &  70 &    1 &     &   3 &     &     &     &    96 \\
             direct-memory-access &    9 &   19 &     &    9 &   1 &   1 &     &     &     &    39 \\
 pointer-arithmetic-memory-layout &    7 &    2 &   1 &      &     &   7 &   1 &     &     &    18 \\
                   data-structure &    7 &    5 &     &    2 &  22 &   1 &     &   1 &     &    38 \\
                         delegate &    4 &   63 &   1 &   19 &   1 &     &     &     &     &    88 \\
                  type-reflection &

In [160]:
labels2_in_order = ['no-gc', 'types', 'memory', 'inevitable', 'efficiency', 'serialization',
                  'layout-control', 'cgo', 'escape-analysis-escape', 'unnecessary', 'unused']
labels2_abbrev = { 'no-gc': 'no GC', 'layout-control': 'LC', 'efficiency': 'eff', 'serialization': 'ser',
                'types': 'typ', 'memory': 'mem', 'cgo': 'cgo', 'inevitable': 'inev', 'unused': 'UU',
                'unnecessary': 'UN', 'escape-analysis-escape': 'EA'}
labels1_in_order = ['syscall', 'direct-memory-access', 'pointer-arithmetic-memory-layout', 'conversion-struct-struct',
                   'conversion-struct-basic', 'conversion-header', 'conversion-struct-bytes',
                   'data-structure', 'delegate', 'type-reflection', 'unused', 'comment']

column_sums = [0] * len(labels2_in_order)

classification_summary_std = [[' '] + [labels2_abbrev[label2] for label2 in labels2_in_order] + ['.']]

for label1 in labels1_in_order:
    values = [sampled_usages_std\
                .where(sampled_usages_std['label']==label1)\
                .where(sampled_usages_std['label2']==label2)\
                .dropna()\
                ['line_number'].count()
             for label2 in labels2_in_order]
    
    for i, value in enumerate(values):
        column_sums[i] += value
    
    classification_summary_std.append([label1] + [number_or_nothing(value) for value in values] + [sum(values)])

classification_summary_std.append(['.'] + column_sums + [sum(column_sums)])
    
column_names = classification_summary_std.pop(0)
df = pd.DataFrame(classification_summary_std, columns=column_names)

print(df.to_latex(index=False))

\begin{tabular}{llllllllllllr}
\toprule
                                  & no GC & typ & mem & inev & eff & ser & LC & cgo & EA & UN & UU &    . \\
\midrule
                          syscall &   150 &     &     &    3 &     &     &  1 &     &    &  1 &    &  155 \\
             direct-memory-access &       &  10 &  13 &      &  17 &   2 &  2 &     &    &    &    &   44 \\
 pointer-arithmetic-memory-layout &       &   9 &   7 &    2 &   5 &     &  3 &   1 &  1 &    &    &   28 \\
         conversion-struct-struct &       &  20 &   4 &    4 &   1 &   9 &    &   1 &  1 &    &    &   40 \\
          conversion-struct-basic &       &     &   3 &    1 &     &   2 &  2 &   1 &    &    &    &    9 \\
                conversion-header &       &   2 &   1 &      &     &     &    &     &    &    &    &    3 \\
          conversion-struct-bytes &       &     &     &      &   4 &   8 &    &     &    &    &    &   12 \\
                   data-structure &       &   7 &  11 &    1 &     &     &    &

## Snippet selection for class examples (Section 7.3)

In [131]:
snippet = sampled_usages_app\
    .where(sampled_usages_app['label']=='direct-memory-access').dropna()\
    .sample(n=1, random_state=2).iloc[0]

print(snippet['context'])

	return
}
func (n *decNaked) rl() (v reflect.Value) {
	v = defUnsafeDecNakedWrapper.rl
	((*unsafeReflectValue)(unsafe.Pointer(&v))).ptr = unsafe.Pointer(&n.l)
	return
}
func (n *decNaked) rs() (v reflect.Value) {
	v = defUnsafeDecNakedWrapper.rs
	((*unsafeReflectValue)(unsafe.Pointer(&v))).ptr = unsafe.Pointer(&n.s)

