In [45]:
import re
import plotly.express as px
import pandas as pd
import statsmodels
# pd.set_option("display.max_rows", None, "display.max_columns", None)

output_directory = 'output-clean'

def added_weekly(fig, idx):
    results = px.get_trendline_results(fig)
    return results.iloc[idx]["px_fit_results"].params[1] * pd.to_timedelta(1, unit='d').total_seconds() * 7
    #todo: added daily + yearly, also print string

def estimate(fig, idx, date=pd.Timestamp.now()):
    results = px.get_trendline_results(fig)
    return results.iloc[idx]["px_fit_results"].params[0] + results.iloc[idx]["px_fit_results"].params[1] * date.timestamp()

def read_dataframe(stage):
    df = pd.read_csv(f'../{output_directory}/{stage}/output.csv')
    if 'committer_date_unix' in df:
        df['committer_date'] = df['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))
    return df

def peek_dataframe(df, column, message, type='str', filter=['revision', 'architecture', 'extractor']):
    success = df[~df[column].str.contains('NA') if type == 'str' else ~df[column].isna()][filter]
    failure = df[df[column].str.contains('NA') if type == 'str' else df[column].isna()][filter]
    print(f'{message}: {len(success)} successes, {len(failure)} failures')

def committer_date_axis(df, fig):
    years_and_revisions = df[['committer_date', 'revision']].drop_duplicates()
    years_and_revisions['committer_date'] = years_and_revisions['committer_date'].apply(lambda d: str(d.year))
    years_and_revisions = years_and_revisions.groupby(['committer_date']).min().reset_index()
    years_and_revisions['committer_date'].str.cat(' (' + years_and_revisions['revision'] + ')')
    fig.update_xaxes(
        ticktext=years_and_revisions['committer_date'].str.cat('<br><em>' + years_and_revisions['revision'].str[1:] + '</em>')[1:],
        tickvals=years_and_revisions['committer_date'][1:],
    )

df_kconfig = read_dataframe('kconfig')
peek_dataframe(df_kconfig, 'model-file', 'model extraction')

df_uvl = read_dataframe('model_to_uvl_featureide')
peek_dataframe(df_uvl, 'uvl-file', 'UVL transformation', 'na', ['model-file'])

df_xml = read_dataframe('model_to_xml_featureide')
peek_dataframe(df_xml, 'xml-file', 'XML transformation', 'na', ['model-file'])

df_smt = read_dataframe('model_to_smt_z3')
peek_dataframe(df_smt, 'smt-file', 'SMT transformation', 'na', ['model-file'])

df_dimacs = read_dataframe('dimacs')
peek_dataframe(df_dimacs, 'dimacs-file', 'CNF transformation')

#df['model-count-log10'] = (df['model-count']).fillna(0).map(lambda s: len(str(s))) #todo

model extraction: 6246 successes, 46 failures
UVL transformation: 4991 successes, 1255 failures
XML transformation: 6215 successes, 31 failures
SMT transformation: 6246 successes, 0 failures
CNF transformation: 6246 successes, 0 failures


In [46]:
fig = px.scatter(
    df_kconfig,
    x='committer_date',
    y='source_lines_of_code',
    trendline='ols',
    labels={'committer_date': 'Year / <em>Revision</em>', 'source_lines_of_code': 'Source Lines of Code'},
    hover_data=['revision']
)
committer_date_axis(df_kconfig, fig)
print("~#SLOC added per week: " + str(int(added_weekly(fig, 0))))
fig

~#SLOC added per week: 18693


In [52]:
archdf = pd.read_csv(f'../{output_directory}/read-linux-architectures/output.csv')
archdf['committer_date'] = archdf['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))
fig = px.line(archdf, x='committer_date', y='architecture', hover_data=['revision'], color='architecture')
fig.update_layout(height=1000)
committer_date_axis(archdf, fig)
fig

In [47]:
fig = px.scatter(
    df_kconfig,
    x='committer_date',
    y='model-features',
    trendline='ols',
    color='extractor'
)
print("~#features added per week (kconfigreader): " + str(int(added_weekly(fig, 0))))
print("~#features added per week (kmax): " + str(int(added_weekly(fig, 1))))
fig = px.scatter(
    df_kconfig,
    x='committer_date',
    y='model-features',
    facet_col='extractor',
    color='architecture',
    labels={'committer_date': 'Year / <em>Revision</em>', 'model-features': '# Features'},
    hover_data=['revision', 'architecture']
)
committer_date_axis(df_kconfig, fig)
fig

~#features added per week (kconfigreader): 22
~#features added per week (kmax): 14


In [48]:
import glob
import re

def inspect_architecture_features_for_revision(extractor, revision):
    architectures = [re.search('\[(.*)\]', f).group(1) for f in glob.glob(f'../{output_directory}/{extractor}/linux/{revision}[*.features')]
    architectures = list(set(architectures))
    architectures.sort()
    data = []
    for architecture in architectures:
        features_filename = f'../{output_directory}/{extractor}/linux/{revision}[{architecture}].features'
        with open(features_filename, 'r') as f:
            features = set([f.strip() for f in f.readlines()])
        data.append({'extractor': extractor, 'revision': revision, 'architecture': architecture, 'features': features, '#features': len(features)})
    common_features = set.intersection(*[d['features'] for d in data])
    for architecture in architectures:
        descriptor = [d for d in data if d['architecture'] == architecture][0]
        other_features = set.union(*[d['features'] for d in data if d['architecture'] != architecture])
        descriptor['#common_features'] = len(common_features)
        descriptor['owned_features'] = descriptor['features'].difference(other_features)
        descriptor['#owned_features'] = len(descriptor['owned_features'])
        descriptor['#shared_features'] = len(descriptor['features'].difference(common_features).difference(descriptor['owned_features']))
    return data

def inspect_architecture_features(extractor):
    revisions = [re.search('linux/(.*)\[', f).group(1) for f in glob.glob(f'../{output_directory}/{extractor}/linux/*.features')]
    revisions = list(set(revisions))
    revisions.sort()
    data = []
    for revision in revisions:
        print(revision)
        data += inspect_architecture_features_for_revision(extractor, revision)
    return pd.DataFrame(data).drop(columns=['features', 'owned_features'])

arch_features = inspect_architecture_features('kconfigreader')

KeyError: "['features', 'owned_features'] not found in axis"

In [224]:
arch2 = arch_features.melt(id_vars=['extractor', 'revision', 'architecture'], value_vars=['#features', '#common_features', '#owned_features', '#shared_features'])
fig = px.box(arch2, x='revision', y='value', color='variable')
fig
# todo: calculate core dead features and remove(?) them from here, or plot them, and find a "good" definition of what it means for an architecture to "include" a feature
#.e.g., dead feature occur in a formula, but not really in the architecture (related work does not do this)
# i guess there may be many core/dead features per arch, as later all arch's have been unified and decision propagation does the rest => i.e., it is important to analyze formulas, not only syntax of kconfig, so our computation effort is justified

In [219]:
arch2 = arch_features.melt(id_vars=['extractor', 'revision', 'architecture'], value_vars=['#common_features', '#shared_features', '#owned_features'])
fig = px.bar(arch2[arch2['architecture'] == 'x86'], x='revision', y='value', color='variable')
fig = px.bar(arch2, x='revision', y='value', color='variable', log_y=True)
fig

In [90]:
from distutils.version import StrictVersion
dff = df[['revision', 'model-features']].groupby('revision').max().reset_index()
dff['revision'] = df['revision'].map(lambda s: s[1:]).apply(StrictVersion)
fig = px.scatter(dff.sort_values(by='revision'), y='model-features')
fig

In [76]:
fig = px.scatter(df, x='committer_date', y='dimacs-variables', hover_data=['revision'], trendline='ols', color='extractor')
print("~#variables added per week (kconfigreader): " + str(int(added_weekly(fig, 0))))
print("~#variables added per week (kmax): " + str(int(added_weekly(fig, 1))))
fig

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['system', 'revision', 'architecture', 'binding-file', 'kconfig-file', 'environment', 'model-file', 'model-features', 'model-variables', 'model-literals', 'model-time'] but received: committer_date

In [253]:
df2 = df[df['model-count-log10'] > 1]
fig = px.scatter(df2, x='committer_date', y='model-count-log10', hover_data=['revision'], color='extractor', trendline='ols')
print("~#configurations scaled per week (kconfigreader): " + str(int(pow(10, added_weekly(fig, 0)))))
print("~#configurations scaled per week (kmax): " + str(int(pow(10, added_weekly(fig, 1)))))
print("~#configurations today (kconfigreader): 10^" + str(int(estimate(fig, 0))))
print("~#configurations today (kmax): 10^" + str(int(estimate(fig, 1))))
fig

~#configurations scaled per week (kconfigreader): 37
~#configurations scaled per week (kmax): 12
~#configurations today (kconfigreader): 10^2322
~#configurations today (kmax): 10^1593


In [194]:
df = pd.read_csv('../output/kconfig/output.csv')
df['committer_date_unix'] = df['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))

fig = px.scatter(df, x='model-features', y='revision-parameter', color='extractor', size='model-literals')
fig.show()
#todo: for history, consider boxplot

In [198]:
fig = px.box(df, x='committer_date_unix', y='model-features', color='extractor')
fig.show()
#todo: for history, consider boxplot