In [1]:
pip install plotly pandas statsmodels kaleido

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [45]:
# read CSV data

import glob
import re
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import os.path
import pickle

output_directory = 'output-clean'
figures_directory = '../../paper-icse-2024-linux/figures'
default_height = 300

def read_dataframe(stage, dtype={}):
    df = pd.read_csv(f'../{output_directory}/{stage}/output.csv', dtype=dtype)
    if 'committer_date_unix' in df:
        df['committer_date'] = df['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))
    return df

def peek_dataframe(df, column, message, type='str', filter=['revision', 'architecture', 'extractor']):
    success = df[~df[column].str.contains('NA') if type == 'str' else ~df[column].isna()][filter]
    failure = df[df[column].str.contains('NA') if type == 'str' else df[column].isna()][filter]
    print(f'{message}: {len(success)} successes, {len(failure)} failures')

def replace_values(df):
    df.replace('kconfigreader', 'KConfigReader', inplace=True)
    df.replace('kmax', 'KClause', inplace=True)

df_architectures = read_dataframe(f'read-linux-architectures')
df_architectures = df_architectures.sort_values(by='committer_date')
df_architectures['year'] = df_architectures['committer_date'].apply(lambda d: int(d.year))

df_kconfig = read_dataframe('kconfig')
peek_dataframe(df_kconfig, 'model-file', 'model extraction')
df_kconfig['year'] = df_kconfig['committer_date'].apply(lambda d: int(d.year))

df_uvl = read_dataframe('model_to_uvl_featureide')
peek_dataframe(df_uvl, 'uvl-file', 'UVL transformation', 'na', ['model-file'])

df_xml = read_dataframe('model_to_xml_featureide')
peek_dataframe(df_xml, 'xml-file', 'XML transformation', 'na', ['model-file'])

df_smt = read_dataframe('model_to_smt_z3')
peek_dataframe(df_smt, 'smt-file', 'SMT transformation', 'na', ['model-file'])

df_dimacs = read_dataframe('dimacs')
peek_dataframe(df_dimacs, 'dimacs-file', 'CNF transformation')

df_backbone_dimacs = read_dataframe('backbone-dimacs')
peek_dataframe(df_backbone_dimacs, 'backbone.dimacs-file', 'backbone transformation', 'na')

df_solve = read_dataframe('solve_model-count', {'model-count': 'string'})
df_solve['model-count'] = df_solve['model-count'].replace('1', '')
df_solve['model-count-log10'] = df_solve['model-count'].fillna('').map(len).replace(0, np.nan)
df_solve['year'] = df_solve['committer_date'].apply(lambda d: int(d.year))
peek_dataframe(df_solve, 'model-count-log10', 'model counting', 'na')

for df in [df_architectures, df_kconfig, df_uvl, df_xml, df_smt, df_dimacs, df_backbone_dimacs, df_solve]:
    replace_values(df)

model extraction: 6246 successes, 46 failures
UVL transformation: 4991 successes, 1255 failures
XML transformation: 6215 successes, 31 failures
SMT transformation: 6246 successes, 0 failures
CNF transformation: 6246 successes, 0 failures
backbone transformation: 6175 successes, 71 failures
model counting: 2055 successes, 4120 failures


In [3]:
# helper functions for drawing plots

def estimate_group(group):
    print('\multicolumn{6}{l}{' + group + '} \\\\')

def estimate_trend(fig, message='', idx=0, date1=pd.Timestamp.now() - pd.Timedelta(days=365.25*20), date2=pd.Timestamp.now()):
    results = px.get_trendline_results(fig)
    intercept = results.iloc[idx]['px_fit_results'].params[0]
    slope = results.iloc[idx]['px_fit_results'].params[1]
    daily = slope * pd.to_timedelta(1, unit='D').total_seconds()
    monthly = slope * pd.to_timedelta(1, unit='D').total_seconds() * 30.437
    yearly = slope * pd.to_timedelta(1, unit='D').total_seconds() * 365.25
    on_date1 = intercept + slope * date1.timestamp()
    on_date2 = intercept + slope * date2.timestamp()
    print('\hspace*{1mm} ' + f'{message} & {round(daily):,} & {round(monthly):,} & {round(yearly):,} & {round(on_date1):,} & {round(on_date2):,} \\\\ ')

def committer_date_x_axis(fig, df=df_kconfig, append_revision=True):
    axis = df_kconfig[['committer_date', 'revision']].drop_duplicates()
    axis['year'] = axis['committer_date'].apply(lambda d: str(d.year))
    axis = axis.sort_values(by='committer_date').groupby('year').nth(0).reset_index()
    fig.update_xaxes(
        ticktext=axis['year'].str.cat('<br><sup>' + axis['revision'].str[1:] + '</sup>')[1:] if append_revision else axis['year'],
        tickvals=axis['year'][1:]
    )

def revision_x_axis(fig, df=df_kconfig):
    axis = df_kconfig[['committer_date', 'revision']].drop_duplicates()
    axis['year'] = axis['committer_date'].apply(lambda d: str(d.year))
    axis = axis.sort_values(by='committer_date').groupby('year').nth(0).reset_index()
    fig.update_xaxes(
        ticktext=axis['year'],
        tickvals=axis['revision']
    )

def log10_y_axis(fig):
    fig.update_yaxes(tickprefix = "10<sup>", ticksuffix = "</sup>")

def percentage_y_axis(fig):
    fig.layout.yaxis.tickformat = ',.0%'

def committer_date_labels(dict={}):
    return {'committer_date': 'Year / First Release in Year'} | dict

def revision_labels(dict={}):
    return {'revision': 'Year'} | dict

def style_legend(fig, position='topleft'):
    if position == 'topleft':
        fig.update_layout(legend=dict(yanchor='top', y=0.98, xanchor='left', x=0.01))
    elif position == 'topright':
        fig.update_layout(legend=dict(yanchor='top', y=0.98, xanchor='right', x=0.98))
    elif position == 'bottomright':
        fig.update_layout(legend=dict(yanchor='bottom', y=0.01, xanchor='right', x=0.98))
    elif position == 'bottomleft':
        fig.update_layout(legend=dict(yanchor='bottom', y=0.01, xanchor='left', x=0.01))
    else:
        fig.update_layout(showlegend=False)

def style_box(fig, legend_position='topleft'):
    fig.update_traces(fillcolor='rgba(0,0,0,0)')
    fig.update_traces(line_width=1)
    fig.update_traces(marker_size=2)
    style_legend(fig, legend_position)

def style_scatter(fig, marker_size=4, legend_position='topleft'):
    if marker_size:
        fig.update_traces(marker_size=marker_size)
    style_legend(fig, legend_position)

def plot_failures(fig, df, x, y, y_value, align='bottom', xref='x', font_size=10, textangle=270):
    group = df.groupby(x, dropna=False)
    failures = (group[y].size() - group[y].count()).reset_index().rename(columns={y: f'{y}_failures'})
    attempts = group[y].size().reset_index().rename(columns={y: f'{y}_attempts'})
    failures = pd.merge(failures, attempts)
    failures[f'{y}_text'] = failures[f'{y}_failures'].astype(str) + ' (' + (failures[f'{y}_failures'] / failures[f'{y}_attempts']).apply(lambda v: "{0:.1f}%".format(v * 100)) + ')'
    for row in range(len(failures)):
        text = failures.at[row, f'{y}_text']
        text = "" if failures.at[row, f'{y}_failures'] == 0 else text
        fig.add_annotation(
            x=failures.at[row, x],
            y=y_value,
            text=text,
            showarrow=False,
            font_size=font_size,
            textangle=textangle,
            align='left' if align == 'bottom' else 'right',
            yanchor='bottom' if align == 'bottom' else 'top',
            yshift=5 if align == 'bottom' else -5,
            font_color='gray',
            xref=xref
        )

def show(fig, name=None, width=1000, height=500, margin=None):
    fig.update_layout(width=width, height=height)
    if margin:
        fig.update_layout(margin=margin)
    else:
        fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
    if figures_directory and name:
        fig.write_image(f'{figures_directory}/{name}.pdf')
    fig.show()

In [4]:
# differentiate kinds of features
# takes up to an hour to run, so here's a loading mechanism
load_feature_data = True

def add_features(descriptor, source, features):
    descriptor[f'{source}_features'] = features if len(features) > 1 else set()
    descriptor[f'#{source}_features'] = len(features) if len(features) > 1 else np.nan

def add_feature_statistics(data, architectures, source):
    total_features = set.union(*[d[f'{source}_features'] for d in data])
    common_features = set.intersection(*[d[f'{source}_features'] for d in data])
    for architecture in architectures:
        descriptor = [d for d in data if d['architecture'] == architecture][0]
        features_in_other_architectures = set.union(*[d[f'{source}_features'] for d in data if d['architecture'] != architecture])
        descriptor[f'#{source}_total_features'] = len(total_features)
        descriptor[f'#{source}_common_features'] = len(common_features)
        owned_features = descriptor[f'{source}_features'].difference(features_in_other_architectures)
        descriptor[f'#{source}_owned_features'] = len(owned_features)
        descriptor[f'#{source}_shared_features'] = len(descriptor[f'{source}_features'].difference(common_features).difference(owned_features))

def clean_features(descriptor, source):
    descriptor.pop(f'{source}_features')

def inspect_architecture_features_for_model(extractor, revision, architecture):
    extractor_features_filename = f'../{output_directory}/kconfig/{extractor}/linux/{revision}[{architecture}].features'
    dimacs_filename = f'../{output_directory}/backbone-dimacs/{extractor}/linux/{revision}[{architecture}].backbone.dimacs'
    dimacs_features = set()
    core_dimacs_features = set()
    dead_dimacs_features = set()
    features_jaccard = np.nan
    with open(extractor_features_filename, 'r') as f:
        extractor_features = set([re.sub('^CONFIG_', '', f.strip()) for f in f.readlines()])
    if os.path.isfile(dimacs_filename):
        with open(dimacs_filename, 'r') as f:
            lines = f.readlines()
            variable_map = {}
            for f in lines:
                if f.startswith('c ') and "k!" not in f and "__VISIBILITY__CONFIG_" not in f:
                    result = re.search('^c ([^ ]+) ([^ ]+)$', f)
                    if result:
                        variable_map[int(result.group(1).strip())] = result.group(2).strip()
            dimacs_features = set(variable_map.values())
            if len(dimacs_features) <= 1:
                dimacs_features = set()
            else:
                features_jaccard = len(set.intersection(extractor_features, dimacs_features)) / len(set.union(extractor_features, dimacs_features))
                for f in lines:
                    result = re.search('^([^ ]+) 0$', f)
                    if result:
                        literal = int(result.group(1))
                        index = abs(literal)
                        if index in variable_map:
                            if literal > 0:
                                core_dimacs_features.add(variable_map[index])
                            else:
                                dead_dimacs_features.add(variable_map[index])
    dimacs_no_dead_features = set.difference(dimacs_features, dead_dimacs_features)
    dimacs_no_dead_no_core_features = set.difference(dimacs_no_dead_features, core_dimacs_features)
    descriptor = {'extractor': extractor, 'revision': revision, 'architecture': architecture, 'features_jaccard': features_jaccard}
    add_features(descriptor, 'extractor', extractor_features)
    add_features(descriptor, 'dimacs', dimacs_features)
    add_features(descriptor, 'core_dimacs', core_dimacs_features)
    add_features(descriptor, 'dead_dimacs', dead_dimacs_features)
    add_features(descriptor, 'dimacs_no_dead', dimacs_no_dead_features)
    add_features(descriptor, 'dimacs_no_dead_no_core', dimacs_no_dead_no_core_features)
    return descriptor

def inspect_architecture_features_for_revision(extractor, revision):
    architectures = [re.search('\[(.*)\]', f).group(1) for f in glob.glob(f'../{output_directory}/kconfig/{extractor}/linux/{revision}[*.features')]
    architectures = list(set(architectures))
    architectures.sort()
    data = []
    for architecture in architectures:
        data.append(inspect_architecture_features_for_model(extractor, revision, architecture))
    sources = ['extractor', 'dimacs', 'core_dimacs', 'dead_dimacs', 'dimacs_no_dead', 'dimacs_no_dead_no_core']
    for source in sources:
        add_feature_statistics(data, architectures, source)
    for source in sources:
        for descriptor in data:
            clean_features(descriptor, source)
    return data

def inspect_architecture_features(extractor):
    revisions = [re.search('linux/(.*)\[', f).group(1) for f in glob.glob(f'../{output_directory}/kconfig/{extractor}/linux/*.features')]
    revisions = list(set(revisions))
    revisions.sort()
    data = []
    i = 0
    for revision in revisions:
        i += 1
        if i % 10 == 0:
            print(revision + ' . ', end='')
        data += inspect_architecture_features_for_revision(extractor, revision)
    print()
    return data

if load_feature_data:
    with open('linux-features.dat', 'rb') as f:
        features_by_kind_per_architecture = pickle.load(f)
else:
    features_by_kind_per_architecture = inspect_architecture_features('kconfigreader')
    features_by_kind_per_architecture += inspect_architecture_features('kmax')
    features_by_kind_per_architecture = pd.DataFrame(features_by_kind_per_architecture)
    with open('linux-features.dat', 'wb') as f:
        pickle.dump(features_by_kind_per_architecture, f)

replace_values(features_by_kind_per_architecture)
df_features = pd.merge(df_architectures, features_by_kind_per_architecture).sort_values(by='committer_date')
df_features = pd.merge(df_kconfig, df_features).sort_values(by='committer_date')

In [5]:
# source lines of code

def sloc(trendline=None):
    return px.scatter(
        df_kconfig,
        x='committer_date',
        y='source_lines_of_code',
        trendline=trendline,
        labels={'source_lines_of_code': 'Number of Source Lines of Code', 'committer_date': 'Year'},
        hover_data=['revision']
    )

fig = sloc('ols')
estimate_trend(fig, 'SLOC')

fig = sloc()
style_scatter(fig)
show(fig, 'sloc', width=500, height=default_height)

\hspace*{1mm} SLOC & 2,670 & 81,282 & 975,399 & 2,660,303 & 22,168,291 \\ 


In [6]:
# processor architectures

fig = px.line(
    pd.DataFrame(columns=df_architectures.columns),
    x='committer_date',
    y='architecture',
    labels=committer_date_labels({'architecture': 'Processor Architecture'}),
    hover_data=['revision']
)

for architecture in df_architectures['architecture'].unique()[::-1]:
    df_architecture = df_architectures[df_architectures['architecture'] == architecture]
    fig.add_trace(go.Scatter(x=df_architecture['committer_date'], y=df_architecture['architecture'], mode='markers', line_color='rgba(0,0,0,1)', marker_size=2, showlegend=False))

df_architectures_first_version = df_architectures.groupby('architecture').min().reset_index()
for row in range(len(df_architectures_first_version)):
    fig.add_annotation(
        x=df_architectures_first_version.at[row, 'committer_date'],
        y=df_architectures_first_version.at[row, 'architecture'],
        text=df_architectures_first_version.at[row, 'architecture'],
        showarrow=False, yshift=0, xshift=-5, font_size=10, xanchor='right', font_color='black'
    )

def add_information(fig, criterion, name, symbol, color):
    df = pd.merge(df_architectures, criterion[['committer_date', 'architecture']])
    fig.add_trace(go.Scatter(
        x=df['committer_date'],
        y=df['architecture'],
        name=name,
        mode='markers',
        marker_size=4, marker_color=color, marker_line_color=color, marker_symbol=symbol, marker_line_width=1
    ))
add_information(fig, df_kconfig[df_kconfig['model-file'].str.contains('NA')], 'Extraction Failure', 'square-open', '#1f77b4')
add_information(fig, df_features[df_features['features_jaccard'].isna() & (df_features['extractor'] == 'KConfigReader')], 'Unsatisfiable (KConfigReader)', 'line-ew', '#2ca02c')
add_information(fig, df_features[df_features['features_jaccard'].isna() & (df_features['extractor'] == 'KClause')], 'Unsatisfiable (KClause)', 'line-ns', '#2ca02c')
add_information(fig, df_solve[df_solve['model-count-log10'].isna() & (df_solve['extractor'] == 'KConfigReader')], '#SAT Timeout (KConfigReader)', 'line-nw', '#ff7f0e')
add_information(fig, df_solve[df_solve['model-count-log10'].isna() & (df_solve['extractor'] == 'KClause')], '#SAT Timeout (KClause)', 'line-ne', '#ff7f0e')
#todo: do not show model count if extraction failed
#todo: maybe add a STATE column which is 'extracted', 'unsat', 'un#sat' etc. ad mapping it onto a symbol

committer_date_x_axis(fig)
fig.update_yaxes(showticklabels=False)
style_scatter(fig, marker_size=None, legend_position='bottomleft')
show(fig, 'architectures')

In [7]:
# Jaccard similarity of configs and features
# configs = extractor features
# features = dimacs features

fig = px.box(
    df_features,
    x='year',
    y='features_jaccard',
    color='extractor',
    labels={'features_jaccard': 'Jaccard Similarity of Configs and Features', 'extractor': 'Extractor', 'year': 'Year'}
)
percentage_y_axis(fig)
style_box(fig, legend_position='bottomright')
plot_failures(fig, df_features, 'year', 'features_jaccard', 0.99, align='top')
show(fig, 'configs-vs-features', height=default_height, width=500)

In [8]:
# share of dead features

fig = px.box(
    df_features.assign(share_of_dead_features=df_features['#dead_dimacs_features'] / df_features['#dimacs_features']),
    x='year',
    y='share_of_dead_features',
    color='extractor',
    labels={'share_of_dead_features': 'Share of Dead Features', 'extractor': 'Extractor', 'year': 'Year'}
)
percentage_y_axis(fig)
style_box(fig)
plot_failures(fig, df_features, 'year', '#dead_dimacs_features', -0.05)
show(fig, 'share-of-dead-features', height=default_height, width=500)

In [9]:
# features

def estimate_features(df, y, name):
    fig = px.scatter(
        df,
        x='committer_date',
        y=y,
        trendline='ols',
        color='extractor'
    )
    estimate_group(name)
    estimate_trend(fig, '\\kcr', 0)
    estimate_trend(fig, '\\kcl', 1)

def plot_features(source):
    features_by_kind = df_features.groupby(['extractor', 'revision']) \
        .agg({f'#{source}_total_features': 'min', f'#{source}_common_features': 'min', f'#{source}_owned_features': 'sum'}).reset_index()
    features_by_kind[f'#{source}_shared_features'] = features_by_kind[f'#{source}_total_features'] \
        - features_by_kind[f'#{source}_common_features'] \
        - features_by_kind[f'#{source}_owned_features']
    features_by_kind = pd.merge(df_kconfig[['committer_date', 'revision']].drop_duplicates(), features_by_kind)

    estimate_features(
        df_features.groupby(['extractor', 'revision', 'committer_date']).min(f'\{source}_total_features').reset_index(),
        f'#{source}_total_features', 'total number')
    estimate_features(df_features, f'#{source}_features', 'any architecture')
    estimate_features(df_features[df_features['architecture'] == 'arm'], f'#{source}_features', 'arm architecture')
    estimate_features(df_features[(df_features['architecture'] == 'i386') | (df_features['architecture'] == 'x86')], f'#{source}_features', 'x86 architecture')

    fig = px.bar(
        features_by_kind.sort_values(by='committer_date'),
        x='revision',
        y=[f'#{source}_common_features', f'#{source}_shared_features', f'#{source}_owned_features'],
        labels=revision_labels({'value': 'Number of Features', 'variable': 'Feature Kind', 'extractor': 'Extractor'}),
        facet_col='extractor'
    )
    revision_x_axis(fig)
    style_legend(fig)
    show(fig, f'features_by_kind_{source}', height=default_height, margin=dict(l=0, r=0, t=20, b=0))

    fig = px.scatter(
        features_by_kind.sort_values(by='committer_date'),
        x='committer_date',
        y=f'#{source}_total_features',
        color='extractor',
        labels={f'#{source}_total_features': 'Total Number of Features', 'extractor': 'Extractor', 'committer_date': 'Year'}
    )
    style_scatter(fig)
    show(fig, f'total_features_{source}', height=default_height, width=500)

    fig = px.scatter(
        df_features,
        x='committer_date',
        y=f'#{source}_features',
        color='architecture',
        labels={f'#{source}_features': 'Number of Features', 'extractor': 'Extractor', 'committer_date': 'Year'},
        hover_data=['revision', 'architecture'],
        facet_col='extractor'
    )
    style_scatter(fig, legend_position=None)
    show(fig, f'features_by_architecture_{source}', height=default_height, margin=dict(l=0, r=0, t=20, b=0))

    fig = px.box(
        df_features,
        x='revision',
        y=f'#{source}_features',
        color='extractor',
        labels=revision_labels({f'#{source}_features': 'Number of Features', 'extractor': 'Extractor'}),
        boxmode='overlay'
    )
    revision_x_axis(fig)
    style_box(fig)
    show(fig, f'features_{source}', height=default_height)

print('configs')
plot_features('extractor')
print('undead features')
plot_features('dimacs_no_dead')

configs
\multicolumn{6}{l}{total number} \\
\hspace*{1mm} \kcr & 2 & 75 & 906 & 3,708 & 21,819 \\ 
\hspace*{1mm} \kcl & 4 & 109 & 1,311 & 4,492 & 30,702 \\ 
\multicolumn{6}{l}{any architecture} \\
\hspace*{1mm} \kcr & 3 & 99 & 1,183 & 2,426 & 26,085 \\ 
\hspace*{1mm} \kcl & 2 & 64 & 771 & 1,856 & 17,279 \\ 
\multicolumn{6}{l}{arm architecture} \\
\hspace*{1mm} \kcr & 3 & 103 & 1,231 & 3,059 & 27,670 \\ 
\hspace*{1mm} \kcl & 2 & 68 & 822 & 2,265 & 18,698 \\ 
\multicolumn{6}{l}{x86 architecture} \\
\hspace*{1mm} \kcr & 3 & 98 & 1,173 & 3,077 & 26,527 \\ 
\hspace*{1mm} \kcl & 2 & 64 & 766 & 2,278 & 17,598 \\ 


undead features
\multicolumn{6}{l}{total number} \\
\hspace*{1mm} \kcr & 2 & 72 & 860 & 3,393 & 20,593 \\ 
\hspace*{1mm} \kcl & 4 & 116 & 1,398 & 5,511 & 33,466 \\ 
\multicolumn{6}{l}{any architecture} \\
\hspace*{1mm} \kcr & 2 & 74 & 887 & 1,587 & 19,324 \\ 
\hspace*{1mm} \kcl & 1 & 43 & 511 & 891 & 11,103 \\ 
\multicolumn{6}{l}{arm architecture} \\
\hspace*{1mm} \kcr & 3 & 93 & 1,112 & 2,753 & 24,984 \\ 
\hspace*{1mm} \kcl & 2 & 56 & 667 & 1,652 & 14,998 \\ 
\multicolumn{6}{l}{x86 architecture} \\
\hspace*{1mm} \kcr & 3 & 85 & 1,015 & 2,722 & 23,031 \\ 
\hspace*{1mm} \kcl & 2 & 50 & 596 & 1,556 & 13,481 \\ 


In [46]:
# model count

def big_sum(series):
    return len(str(sum([int(value) for value in series if not pd.isna(value) and value])))

df_solve_slice = df_solve[df_solve['year'] <= 2013]
df_solve_group = df_solve_slice.groupby(['extractor', 'revision'], dropna=False)
df_solve_failures = (df_solve_group['model-count-log10'].size() - df_solve_group['model-count-log10'].count()).reset_index()
df_solve_failures['is-upper-bound'] = df_solve_failures['model-count-log10'] == 0
df_solve_failures = df_solve_failures.drop(columns='model-count-log10')
df_solve_total = pd.merge(df_solve_slice, df_solve_failures)
df_solve_total = df_solve_total.groupby(['extractor', 'committer_date']).agg({'model-count': big_sum, 'is-upper-bound': 'min'}).reset_index()

def estimate_configurations(df, y, name):
    fig = px.scatter(
        df,
        x='committer_date',
        y=y,
        trendline='ols',
        color='extractor'
    )
    estimate_group(name)
    estimate_trend(fig, '\\kcr', 0)
    estimate_trend(fig, '\\kcl', 1)

estimate_configurations(df_solve_total[df_solve_total['is-upper-bound'] == True], 'model-count', 'total number')
estimate_configurations(df_solve, 'model-count-log10', 'any architecture')
estimate_configurations(df_solve[df_solve['architecture'] == 'arm'],'model-count-log10', 'arm architecture')
estimate_configurations(df_solve[(df_solve['architecture'] == 'i386') | (df_solve['architecture'] == 'x86')], 'model-count-log10', 'x86 architecture')

fig = px.scatter(
    df_solve_slice,
    x='committer_date',
    y='model-count-log10',
    color='architecture',
    labels={'model-count-log10': 'Number of Configurations (log<sub>10</sub>)', 'committer_date': 'Year', 'extractor': 'Extractor'},
    hover_data=['revision', 'architecture'],
    facet_col='extractor'
)
log10_y_axis(fig)
style_scatter(fig, legend_position=None)
# plot_failures(fig, df_solve_slice[df_solve_slice['extractor'] == 'kconfigreader'], 'committer_date', 'model-count-log10', 0, align='bottom', xref='x', font_size=8)
# plot_failures(fig, df_solve_slice[df_solve_slice['extractor'] == 'kmax'], 'committer_date', 'model-count-log10', 0, align='bottom', xref='x2', font_size=8)
show(fig, 'model-count', height=default_height, margin=dict(l=0, r=0, t=20, b=0))

fig = px.box(
    df_solve_slice.sort_values(by='committer_date'),
    x='revision',
    y='model-count-log10',
    color='extractor',
    labels=revision_labels({'model-count-log10': 'Number of Configurations (log<sub>10</sub>)', 'extractor': 'Extractor'}),
    hover_data=['revision', 'architecture']
)
revision_x_axis(fig)
log10_y_axis(fig)
style_box(fig)
# plot_failures(fig, df_solve_slice, 'revision', 'model-count-log10', 0, align='bottom')
show(fig, height=default_height)

fig = px.scatter(
    df_solve_total.replace(True, 'Exact').replace(False, 'Lower Bound'),
    x='committer_date',
    y='model-count',
    color='extractor',
    symbol='is-upper-bound',
    symbol_sequence=['circle', 'triangle-up-open'],
    labels=revision_labels({'model-count-log10': 'Total Number of Configurations (log<sub>10</sub>)', 'extractor': 'Extractor', 'is-upper-bound': 'Kind of Bound', 'committer_date': 'Year'})
)
log10_y_axis(fig)
style_scatter(fig, legend_position='topright')
show(fig, 'model-count-total', height=default_height, width=500)

\multicolumn{6}{l}{total number} \\
\hspace*{1mm} \kcr & 0 & 5 & 58 & 443 & 1,599 \\ 
\hspace*{1mm} \kcl & 0 & 7 & 83 & 682 & 2,346 \\ 
\multicolumn{6}{l}{any architecture} \\
\hspace*{1mm} \kcr & 0 & 2 & 18 & 509 & 875 \\ 
\hspace*{1mm} \kcl & 0 & 2 & 21 & 343 & 761 \\ 
\multicolumn{6}{l}{arm architecture} \\
\hspace*{1mm} \kcr & 0 & 6 & 68 & 631 & 1,989 \\ 
\hspace*{1mm} \kcl & 0 & 4 & 47 & 410 & 1,344 \\ 
\multicolumn{6}{l}{x86 architecture} \\
\hspace*{1mm} \kcr & 0 & 7 & 85 & 682 & 2,390 \\ 
\hspace*{1mm} \kcl & 0 & 5 & 58 & 443 & 1,602 \\ 


In [26]:
mc = df_solve_slice['model-count']
[i for i in mc[~mc.isna()].astype(str).tolist()]

['11584344439872278999204676812069812705207689717971079214309579306015942142402518406017256210024721055906155185264773819238428006750092193928800769233376262445081807658162263704467146162668163906449268738339602138460228179650639550600414638315860344010845458159289330033744450397416459408123572693575982591483587316909412012074161203443590335033902074966369054229643847463427221375385714598958841640589876422374552354233645555247923570433323618651236142995168402805982585156836904445535995977584193696394396304725962186367205592373150801880721998662664932326489897984000000000',
 '1107758401330579004319065523212394538337685394182383896912451755792764643082306506315515757947916195631364010110899576775232744736938200482558944381187303720900964900079744726275516191002150142746797004126512039847862774595563755046572859538091474880629284930834389360814117475132108538948728804682739243061597616958181511751360525672630418580049535408332100084765154009045052159458472335418201572931451980370687731662

In [11]:
# model count time

df_solve_slice = df_solve[~df_solve['model-count-log10'].isna()]
fig = px.scatter(
    df_solve_slice,
    x=df_solve_slice['committer_date'],
    y=df_solve_slice['backbone.dimacs-analyzer-time'] / 1000000000,
    color='architecture',
    labels={'extractor': 'Extractor', 'y': 'Time for Counting (log<sub>10</sub> s)', 'committer_date': 'Year'},
    facet_col='extractor',
    log_y=True
)
style_scatter(fig, legend_position=None)
show(fig, 'model-count-time', height=default_height, margin=dict(l=0, r=0, t=20, b=0))