In [12]:
%%capture
pip install plotly pandas statsmodels kaleido scipy nbformat jinja2

In [12]:
import glob
import re
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd
import numpy as np
import os.path
import pickle
import scipy
from statistics import mean, stdev
from math import sqrt, log10
from packaging.version import Version

output_directory = "output-busybox"
base_file = "init_base.json"

In [63]:
with open(base_file) as json_data:
    d = json.load(json_data)

In [62]:
def get_latest_nonLinux(df):
    # assumes df_kconfig
    return df.loc[df['committer_date_unix'].idxmax()]

In [81]:
def read_dataframe(stage, dtype={}, usecols=None, file=None, project=None):
    if not file:
        file = 'output'
    directory = output_directory
    if project != None:
        directory = f"output-{project}"
    df = pd.read_csv(f'{directory}/{stage}/{file}.csv', dtype=dtype, usecols=usecols)
    if 'committer_date_unix' in df:
        df['committer_date'] = df['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))
    return df

df_kconfig = read_dataframe('kconfig')


In [None]:
sloc = get_latest_nonLinux(df_kconfig)

np.float64(209492.0)

In [60]:
plot_column_mapping = {
    "features": "model-features",
    "total-features": "model-features",
    "model-count": "model-literals",
    "model-count-time": "model-time"
}

In [86]:
latestData = dict()

In [87]:
def fill_latest_nonLinux():
    for bp in d:
        if bp == "linux":
            continue
        projects = d[bp]["projects"]
        plots = d[bp]["plots"]
        for p in projects:
            if p not in latestData:
                latestData[p] = dict()
            df_kconfig = read_dataframe('kconfig')
            latest_all = get_latest_nonLinux(df_kconfig[df_kconfig["system"] == p])
            for plot in plots:
                latest = latest_all[plot_column_mapping.get(plot, plot)]
                latestData[p][plot] = {
                    "title": d[bp]["plots"][plot]["title"],
                    "currentValue": str(latest),
                    "cmpLastWeek": "",
                    "cmpLastMonth": "",
                    "desc": d[bp]["plots"][plot]["desc"],
                }
fill_latest_nonLinux()
from pprint import pprint
pprint(latestData)

{'busybox': {'features': {'cmpLastMonth': '',
                          'cmpLastWeek': '',
                          'currentValue': '1138.0',
                          'desc': 'Features: Information, Description etc.',
                          'title': 'Features'},
             'model-count': {'cmpLastMonth': '',
                             'cmpLastWeek': '',
                             'currentValue': '5200.0',
                             'desc': 'Model Count: Information, Description '
                                     'etc.',
                             'title': 'Model Count'},
             'model-count-time': {'cmpLastMonth': '',
                                  'cmpLastWeek': '',
                                  'currentValue': '7745479537',
                                  'desc': 'Model Count Time: Information, '
                                          'Description etc.',
                                  'title': 'Model Count Time'},
             'source_lines_of_

In [None]:
def fill_latest_linux():
    for bp in d:
        if bp != "linux":
            continue
        projects = d[bp]["projects"]
        plots = d[bp]["plots"]
        for p in projects:
            arch = p.rsplit("/")[-1]
            if arch == "all":
                continue
            if p not in latestData:
                latestData[p] = dict()
            df_kconfig = read_dataframe('kconfig', project="linux")
            latest_all = get_latest_nonLinux(df_kconfig[df_kconfig["architecture"] == arch])
            for plot in plots:
                latest = latest_all[plot_column_mapping.get(plot, plot)]
                latestData[p][plot] = {
                    "title": "Total Features",
                    "currentValue": str(latest),
                    "cmpLastWeek": "",
                    "cmpLastMonth": "",
                    "desc": "Total Features: Information, Description etc.",
                }
fill_latest_linux()
# from pprint import pprint
# pprint(latestData)


In [380]:
def group_by_arch(df):
    grouped = df.groupby('architecture')
    dfs = {arch: group for arch, group in grouped}
    return dfs

def read_dataframe(stage, dtype={}, usecols=None, file=None, arch=None, output_dir=output_directory):
    if not file:
        file = 'output'
    df = pd.read_csv(f'{output_dir}/{stage}/{file}.csv', dtype=dtype, usecols=usecols)
    if 'committer_date_unix' in df:
        df['committer_date'] = df['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))
    if arch != None:
        return group_by_arch(df)[arch]
    return df

def read_dataframe_linux(stage, dtype={}, usecols=None, file=None, arch=None):
    if not file:
        file = 'output'
    df = pd.read_csv(f'output-linux/{stage}/{file}.csv', dtype=dtype, usecols=usecols)
    if 'committer_date_unix' in df:
        df['committer_date'] = df['committer_date_unix'].apply(lambda d: pd.to_datetime(d, unit='s'))
    if arch != None:
        return group_by_arch(df)[arch]
    return df


def replace_values(df):
    df.replace('kconfigreader', 'KConfigReader', inplace=True)
    df.replace('kmax', 'KClause', inplace=True)

def big_log10(str):
    return log10(int(str)) if not pd.isna(str) and str != '' else pd.NA

def process_model_count(df_solve):
    df_solve['model-count'] = df_solve['model-count'].replace('1', '')
    df_solve['model-count-log10'] = df_solve['model-count'].fillna('').apply(big_log10).replace(0, np.nan)
    df_solve['year'] = df_solve['committer_date'].apply(lambda d: int(d.year))

def peek_dataframe(df, column, message, type='str', filter=['revision', 'architecture', 'extractor']):
    success = df[~df[column].str.contains('NA') if type == 'str' else ~df[column].isna()][filter]
    failure = df[df[column].str.contains('NA') if type == 'str' else df[column].isna()][filter]
    print(f'{message}: {len(success)} successes, {len(failure)} failures')

In [324]:
output_directory = "output-linux"
df_kconfig = read_dataframe('kconfig')
df_kconfig['year'] = df_kconfig['committer_date'].apply(lambda d: int(d.year))

df_architectures = read_dataframe(f'read-linux-architectures')
df_architectures = df_architectures.sort_values(by='committer_date')
df_architectures['year'] = df_architectures['committer_date'].apply(lambda d: int(d.year))

df_configs = read_dataframe(f'read-linux-configs')
df_configs = df_configs[~df_configs['kconfig-file'].str.contains('/um/')]

df_config_types = read_dataframe(f'read-linux-configs', file='output.types')
df_config_types = df_config_types[~df_config_types['kconfig-file'].str.contains('/um/')]
df_config_types = df_config_types.merge(df_architectures[['revision', 'committer_date']].drop_duplicates())
    

df_uvl = read_dataframe('model_to_uvl_featureide')
df_smt = read_dataframe('model_to_smt_z3')
df_dimacs = read_dataframe('dimacs')
df_backbone_dimacs = read_dataframe('backbone-dimacs')
df_solve = read_dataframe('solve_model-count', {'model-count': 'string'})


In [15]:
# differentiate kinds of features
potential_misses_grep = set()
potential_misses_kmax = set()
extractor_comparison = {}
df_configs_configurable = df_configs.copy()
df_configs_configurable['configurable'] = False

def jaccard(a, b):
    return len(set.intersection(a, b)) / len(set.union(a, b))

def add_features(descriptor, source, features, min=2):
    descriptor[f'#{source}'] = len(features) if features is not None and len(features) >= min else np.nan

def get_variables(variable_map):
    variables = set(variable_map.values())
    if len(variables) <= 1:
        variables = set()
    return variables

def read_unconstrained_feature_variables(extractor, revision, architecture):
    unconstrained_features_filename = f'{output_directory}/unconstrained-features/{extractor}/linux/{revision}[{architecture}].unconstrained.features'
    unconstrained_feature_variables = set()
    if os.path.isfile(unconstrained_features_filename):
        with open(unconstrained_features_filename, 'r') as f:
            unconstrained_feature_variables = set([re.sub('^CONFIG_', '', f.strip()) for f in f.readlines()])
    return unconstrained_feature_variables

def inspect_architecture_features_for_model(extractor, revision, architecture, config_features, features_for_last_revision):
    global potential_misses_grep, potential_misses_kmax
    
    features_filename = f'{output_directory}/kconfig/{extractor}/linux/{revision}[{architecture}].features'
    with open(features_filename, 'r') as f:
        extracted_features = set([re.sub('^CONFIG_', '', f.strip()) for f in f.readlines()])
    
    unconstrained_feature_variables = read_unconstrained_feature_variables(extractor, revision, architecture)

    dimacs_filename = f'{output_directory}/backbone-dimacs/{extractor}/linux/{revision}[{architecture}].backbone.dimacs'
    all_variables = set()
    variables = set()
    feature_variables = set()
    core_feature_variables = set()
    dead_feature_variables = set()
    undead_feature_variables = set()
    all_feature_variables = set()
    features = set()
    core_features = set()
    unconstrained_features = set()
    constrained_features = set()
    added_features = None
    removed_features = None
    infos = {'extracted_features_jaccard': np.nan, \
                    'all_variables_jaccard': np.nan, \
                    'variables_jaccard': np.nan, \
                    'feature_variables_jaccard': np.nan, \
                    'undead_feature_variables_jaccard': np.nan, \
                    'all_feature_variables_jaccard': np.nan, \
                    'features_jaccard': np.nan, \
                    'unconstrained_bools': np.nan, \
                    'unconstrained_tristates': np.nan}
    
    if os.path.isfile(dimacs_filename):
        with open(dimacs_filename, 'r') as f:
            lines = f.readlines()
            all_variable_map = {}
            variable_map = {}
            feature_variable_map = {}
            for f in lines:
                if f.startswith('c '):
                    result = re.search('^c ([^ ]+) ([^ ]+)$', f)
                    if result:
                        index = int(result.group(1).strip())
                        name = result.group(2).strip()
                        all_variable_map[index] = name
                        if "k!" not in name:
                            variable_map[index] = name
                            if name != 'True' \
                                and name != '<unsupported>' \
                                and name != 'PREDICATE_Compare' \
                                and not name.startswith('__VISIBILITY__CONFIG_') \
                                and not name.endswith('_MODULE'):
                                feature_variable_map[index] = name
            all_variables = get_variables(all_variable_map)
            variables = get_variables(variable_map)
            feature_variables = get_variables(feature_variable_map)

            backbone_features_filename = f'{output_directory}/backbone-features/{extractor}/linux/{revision}[{architecture}].backbone.features'
            if os.path.isfile(backbone_features_filename):
                with open(backbone_features_filename, 'r') as f:
                    lines = f.readlines()
                    if len(lines) > 1:
                        core_feature_variables = set([line[1:].strip() for line in lines if line.startswith('+')]).intersection(feature_variables)
                        dead_feature_variables = set([line[1:].strip() for line in lines if line.startswith('-')]).intersection(feature_variables)

            if len(feature_variables) > 0:
                undead_feature_variables = feature_variables.difference(dead_feature_variables)
                all_feature_variables = undead_feature_variables.union(unconstrained_feature_variables)
                features = all_feature_variables.intersection(config_features)
                if f'{revision}###{architecture}' not in extractor_comparison:
                    extractor_comparison[f'{revision}###{architecture}'] = features
                else:
                    extractor_comparison[f'{revision}###{architecture}'] = jaccard(extractor_comparison[f'{revision}###{architecture}'], features)
                core_features = features.intersection(core_feature_variables)
                unconstrained_features = features.intersection(unconstrained_feature_variables)
                unconstrained_features_by_type = pd.DataFrame(list(unconstrained_features), columns=['config']) \
                    .merge(df_config_types[(df_config_types['revision'] == revision)])
                unconstrained_bools = unconstrained_features_by_type[unconstrained_features_by_type['type'] == 'bool']['config'].drop_duplicates()
                unconstrained_tristates = unconstrained_features_by_type[unconstrained_features_by_type['type'] == 'tristate']['config'].drop_duplicates()
                constrained_features = features.difference(core_feature_variables).difference(unconstrained_feature_variables)
                if architecture in features_for_last_revision and len(features_for_last_revision[architecture]) > 0:
                    added_features = features.difference(features_for_last_revision[architecture])
                    removed_features = features_for_last_revision[architecture].difference(features)
                infos = { \
                            'extracted_features_jaccard': jaccard(extracted_features, features), \
                            'all_variables_jaccard': jaccard(all_variables, features), \
                            'variables_jaccard': jaccard(variables, features), \
                            'feature_variables_jaccard': jaccard(feature_variables, features), \
                            'undead_feature_variables_jaccard': jaccard(undead_feature_variables, features), \
                            'all_feature_variables_jaccard': jaccard(all_feature_variables, features), \
                            'features_jaccard': 1, \
                            'unconstrained_bools': len(unconstrained_bools), \
                            'unconstrained_tristates': len(unconstrained_tristates) \
                        }
    descriptor = {'extractor': extractor, 'revision': revision, 'architecture': architecture} | infos
    add_features(descriptor, 'config_features', config_features) # F_config
    add_features(descriptor, 'extracted_features', extracted_features) # F_extracted
    add_features(descriptor, 'unconstrained_feature_variables', unconstrained_feature_variables, min=1) # F_unconstrained
    add_features(descriptor, 'all_variables', all_variables) # V_all
    add_features(descriptor, 'variables', variables) # V_phi
    add_features(descriptor, 'feature_variables', feature_variables) # FV_phi
    add_features(descriptor, 'core_feature_variables', core_feature_variables, min=1) # FV_core
    add_features(descriptor, 'dead_feature_variables', dead_feature_variables, min=1) # FV_dead
    add_features(descriptor, 'constrained_feature_variables', undead_feature_variables.difference(core_feature_variables)) # FV_constrained
    add_features(descriptor, 'undead_feature_variables', undead_feature_variables) # FV_undead
    add_features(descriptor, 'all_feature_variables', all_feature_variables) # FV
    add_features(descriptor, 'ALL_feature_variables', feature_variables.union(unconstrained_feature_variables)) # FV_all
    add_features(descriptor, 'features', features) # F
    add_features(descriptor, 'core_features', core_features, min=1)
    add_features(descriptor, 'unconstrained_features', unconstrained_features, min=1)
    add_features(descriptor, 'constrained_features', constrained_features)
    add_features(descriptor, 'added_features', added_features, min=0)
    add_features(descriptor, 'removed_features', removed_features, min=0)
    if extractor == 'kmax':
        potential_misses_grep.update([f for f in all_feature_variables.difference(features) if '__CONFIG_' not in f])
    return descriptor, feature_variables.union(unconstrained_feature_variables), features

def inspect_architecture_features_for_revision(extractor, revision, features_for_last_revision):
    config_features = set(df_configs[df_configs['revision'] == revision]['config'])
    architectures = [re.search('\[(.*)\]', f).group(1) for f in glob.glob(f'{output_directory}/kconfig/{extractor}/linux/{revision}[*.features')]
    architectures = list(set(architectures))
    architectures.sort()
    data = []
    total_features = set()
    total_feature_variables = set()
    features_for_current_revision = {}
    for architecture in architectures:
        descriptor, feature_variables, features = inspect_architecture_features_for_model(extractor, revision, architecture, config_features, features_for_last_revision)
        data.append(descriptor)
        total_features.update(features)
        features_for_current_revision[architecture] = features
        if extractor == 'kmax':
            total_feature_variables.update(feature_variables)
    for descriptor in data:
        add_features(descriptor, 'total_features', total_features)
        total_added_features = None
        total_removed_features = None
        if 'TOTAL' in features_for_last_revision and len(features_for_last_revision['TOTAL']) > 0:
            total_added_features = total_features.difference(features_for_last_revision['TOTAL'])
            total_removed_features = features_for_last_revision['TOTAL'].difference(total_features)
        add_features(descriptor, 'total_added_features', total_added_features, min=0)
        add_features(descriptor, 'total_removed_features', total_removed_features, min=0)
    features_for_current_revision['TOTAL'] = total_features
    df_configs_configurable.loc[(df_configs_configurable['revision'] == revision) & (df_configs_configurable['config'].isin(total_features)), 'configurable'] = True
    if extractor == 'kmax':
        potential_misses_kmax.update([f for f in config_features.difference(total_feature_variables)])
    return data, features_for_current_revision

def inspect_architecture_features(extractor):
    print(f'{extractor} ', end='')
    revisions = [re.search('/linux/(.*)\[', f).group(1) for f in glob.glob(f'{output_directory}/kconfig/{extractor}/linux/*.features')]
    revisions = list(set(revisions))
    revisions.sort(key=Version)
    data = []
    features_for_last_revision = {}
    i = 0
    for revision in revisions:
        i += 1
        if i % 10 == 0:
            print(revision + ' . ', end='')
        new_data, features_for_last_revision = inspect_architecture_features_for_revision(extractor, revision, features_for_last_revision)
        data += new_data
    print()
    return data

if os.path.isfile(f'{output_directory}/linux-features.dat'):
    with open(f'{output_directory}/linux-features.dat', 'rb') as f:
        [features_by_kind_per_architecture, df_extractor_comparison, potential_misses_grep, potential_misses_kmax, df_configs_configurable] = pickle.load(f)
else:
    features_by_kind_per_architecture = inspect_architecture_features('kconfigreader')
    features_by_kind_per_architecture += inspect_architecture_features('kmax')
    features_by_kind_per_architecture = pd.DataFrame(features_by_kind_per_architecture)
    df_extractor_comparison = []
    for key, value in extractor_comparison.items():
        [revision, architecture] = key.split('###')
        if type(value) is set:
            value = pd.NA
        df_extractor_comparison.append({'revision': revision, 'architecture': architecture, 'extractor_jaccard': value})
    df_extractor_comparison = pd.DataFrame(df_extractor_comparison)
    with open(f'{output_directory}/linux-features.dat', 'wb') as f:
        pickle.dump([features_by_kind_per_architecture, df_extractor_comparison, potential_misses_grep, potential_misses_kmax, df_configs_configurable], f)

replace_values(features_by_kind_per_architecture)
df_features = pd.merge(df_architectures, features_by_kind_per_architecture, how='outer').sort_values(by='committer_date')
df_features = pd.merge(df_kconfig, df_features, how='outer').sort_values(by='committer_date')

def compare_with_grep(message, list):
    print(f'{message}: ' + str(len(list)))
    print(pd.merge(df_configs[['config','kconfig-file']], pd.DataFrame(list, columns=['config']), how='inner') \
        .drop_duplicates().merge(df_config_types[['config', 'type']]).drop_duplicates())

def report_potential_misses(potential_misses_grep, potential_misses_kmax):
    # these are the features NOT found by grep, but found by kmax (this allows us to check whether the grep regex matches too much)
    # the only matches are enviroment variables (e.g., ARCH) and mistakes in kconfig files: IA64_SGI_UV (which has a trailing `) and SND_SOC_UX500_MACH_MOP500 (which has a leading +)
    compare_with_grep('#potential misses (grep)', potential_misses_grep)
    print()

    # these are the features found by grep, but NOT found by kmax, either constrained or unconstrained (this allows us to check whether kmax matches enough)
    # as there are some extraction failures for kmax, we expect some misses; also, we do not extract the um architecture; and finally, there are some test kconfig files that are never included
    # in the following, we try to filter out these effects (this is not perfect though)
    potential_misses_kmax_with_type = (pd.merge(df_configs[['config','kconfig-file', 'revision']], pd.DataFrame(potential_misses_kmax, columns=['config']), how='inner') \
            .drop_duplicates().merge(df_config_types[['config', 'type']]).drop_duplicates())
    misses_due_to_tests = set(potential_misses_kmax_with_type[ \
            potential_misses_kmax_with_type['kconfig-file'].str.startswith('Documentation/') | \
            potential_misses_kmax_with_type['kconfig-file'].str.startswith('scripts/')]['config'].unique())
    missing_kmax_models = df_features[(df_features['extractor'] == 'KClause') & df_features['#extracted_features'].isna()]
    missing_kmax_models = missing_kmax_models[['revision', 'architecture']].drop_duplicates()
    potential_misses_kmax_with_type['architecture'] = potential_misses_kmax_with_type['kconfig-file'].apply(lambda s: re.sub(r'^arch/(.*?)/.*$', r'\1', s))
    potential_misses_due_to_missing_kmax_models = set(potential_misses_kmax_with_type.merge(missing_kmax_models[['revision', 'architecture']].drop_duplicates()) \
                                                    .drop(columns=['kconfig-file', 'revision', 'architecture', 'type'])['config'].unique())
    potential_misses_kmax = potential_misses_kmax.difference(misses_due_to_tests).difference(potential_misses_due_to_missing_kmax_models)
    # the remaining matches are due to our way of using kmax extractor, where we ignore lines with new kconfig constructs like $(success,...)
    compare_with_grep('#potential misses (kmax)', potential_misses_kmax)

report_potential_misses(potential_misses_grep, potential_misses_kmax)

#potential misses (grep): 7
                        config             kconfig-file      type
0                         ARCH             init/Kconfig    string
53               KERNELVERSION             init/Kconfig    string
106                IA64_SGI_UV        arch/ia64/Kconfig      bool
187  SND_SOC_UX500_MACH_MOP500  sound/soc/ux500/Kconfig  tristate

#potential misses (kmax): 21
                                config                        kconfig-file  \
0                      MIPS_FPE_MODULE                 arch/mips64/Kconfig   
29                      BLK_DEV_FD1772         drivers/acorn/block/Kconfig   
83                         BLK_DEV_MFM         drivers/acorn/block/Kconfig   
137             BLK_DEV_MFM_AUTODETECT         drivers/acorn/block/Kconfig   
191                      VIRTEX_II_PRO      arch/ppc/platforms/4xx/Kconfig   
207                      VIRTEX_II_PRO  arch/powerpc/platforms/4xx/Kconfig   
223                          DRAM_BASE              arch/arm/Kconf

In [326]:
df_kconfig["architecture"].unique()

array(['alpha', 'arm', 'cris', 'i386', 'ia64', 'm68k', 'mips', 'mips64',
       'parisc', 'ppc', 'ppc64', 's390', 's390x', 'sh', 'sparc',
       'sparc64', 'x86_64', 'm68knommu', 'v850', 'h8300', 'arm26', 'm32r',
       'sh64', 'frv', 'xtensa', 'powerpc', 'avr32', 'blackfin', 'x86',
       'mn10300', 'microblaze', 'score', 'tile', 'unicore32', 'openrisc',
       'arc', 'arm64', 'c6x', 'hexagon', 'metag', 'nios2', 'riscv',
       'nds32', 'csky', 'loongarch'], dtype=object)

In [16]:

if os.path.isfile(f'{output_directory}/linux-features.dat'):
    with open(f'{output_directory}/linux-features.dat', 'rb') as f:
        [features_by_kind_per_architecture, df_extractor_comparison, potential_misses_grep, potential_misses_kmax, df_configs_configurable] = pickle.load(f)
else:
    features_by_kind_per_architecture = inspect_architecture_features('kconfigreader')
    features_by_kind_per_architecture += inspect_architecture_features('kmax')
    features_by_kind_per_architecture = pd.DataFrame(features_by_kind_per_architecture)
    df_extractor_comparison = []
    for key, value in extractor_comparison.items():
        [revision, architecture] = key.split('###')
        if type(value) is set:
            value = pd.NA
        df_extractor_comparison.append({'revision': revision, 'architecture': architecture, 'extractor_jaccard': value})
    df_extractor_comparison = pd.DataFrame(df_extractor_comparison)
df_features = pd.merge(df_architectures, features_by_kind_per_architecture, how='outer').sort_values(by='committer_date')
df_features = pd.merge(df_kconfig, df_features, how='outer').sort_values(by='committer_date')


In [32]:
def linux_total_features():
    df_total_features = df_features.groupby(['extractor', 'revision']).agg({'#total_features': 'min'}).reset_index()
    df_total_features = pd.merge(df_kconfig[['committer_date', 'revision']].drop_duplicates(), df_total_features)
    return df_total_features.sort_values(by=["committer_date"]).tail(5)

In [33]:
linux_total_features()["#total_features"]

287    19812.0
269    19898.0
268    19740.0
270    19869.0
271    20024.0
Name: #total_features, dtype: float64

In [170]:
def latest_for(df, column, committer_date):
    x = df.sort_values(by=[committer_date])
    return x.tail(1)[column]

In [295]:
def by_revision(df):
    x = df[df['revision'].str.contains("\w\d+\.0$", regex=True)]
    if len(x) == 0:
        x = df.sort_values(by=["revision"])
    return x

In [302]:
def find_revision(df, revision):
    x = df[df['revision'].str.contains(revision, regex=False)]
    return x

In [84]:
def for_arch(df, arch):
    return df[df['architecture'] == arch]

In [334]:
x = for_arch(df_kconfig, "x86_64")
x = by_revision(x)
x["revision"]

32      v2.5.45
33      v2.5.45
70      v2.5.46
71      v2.5.46
108     v2.5.47
         ...   
2841     v2.6.7
2880     v2.6.8
2881     v2.6.8
2922     v2.6.9
2923     v2.6.9
Name: revision, Length: 110, dtype: object

In [276]:
import json
def write_object_to_file(obj, name):
    with open(name, 'w') as fp:
        json.dump(obj, fp)
def read_json(path):
    with open(path) as json_data:
        return json.load(json_data)

In [386]:
def get_metrics_sloc_linux():
    output_directory = "output-linux"
    df_kconfig = read_dataframe("kconfig", output_dir=output_directory, )
    archs = df_kconfig["architecture"].unique()
    vals = dict()
    for arch in archs:
        df_arch = for_arch(df_kconfig, arch)
        df_arch = by_revision(df_arch)
        sloc = int(
            latest_for(df_arch, "source_lines_of_code", "committer_date_unix").iloc[0]
        )
        last_rev = latest_for(df_arch, "revision", "committer_date_unix").iloc[0]
        major = int(last_rev[1])
        before_last = df_arch[df_arch['revision'].str.contains(f"\w{major-1}\.\d$", regex=True)]
        if len(before_last) == 0:
            vals[f"linux/{arch}"] = {
                "source_lines_of_code": {
                    "currentValue": sloc,
                    "cmpLastRevision": "+100% (No Prior Revision)",
                }
            }
            continue
        before_last = before_last["source_lines_of_code"]
        before_last = int(before_last.iloc[0])
        value = round(100 * (sloc - before_last) / before_last, 2)
        vals[f"linux/{arch}"] = {
            "source_lines_of_code": {
                "currentValue": f"{sloc} loc",
                "cmpLastRevision": f"{value:+.1f}%",
            }
        }

    return vals

In [350]:
def merge_metrics(new):
    old = read_json("src/public/init.json")
    
    for proj, metrics in new.items():
        for metric, values in metrics.items():
            # print(f"{proj=}, {metric=}, {values=}")
            for name, value in values.items():
                if proj not in old["projectData"]:
                    print(f"{proj} not in old")
                    continue
                old["projectData"][proj][metric][name] = value
    write_object_to_file(old, "src/public/init.json")

In [388]:
new = get_metrics_sloc_linux()

In [389]:
merge_metrics(new)

In [387]:
output_directory = "output-busybox"
df_kconfig = read_dataframe('kconfig')
df_kconfig = df_kconfig[df_kconfig["system"] =="busybox"]
def get_metrics_sloc_nonLinux(project):
    vals = dict()
    df_arch = by_revision(df_kconfig)
    lastTwo = df_arch.sort_values(by="committer_date_unix").tail(2)["revision"]
    print(lastTwo)
    last_rev = lastTwo.iloc[1]
    before_last_rev = lastTwo.iloc[0]
    sloc = int(df_arch[df_arch["revision"]==last_rev]["source_lines_of_code"].iloc[0])
    before_last = int(df_arch[df_arch["revision"]==before_last_rev]["source_lines_of_code"].iloc[0])
    print(sloc, before_last)
    value = round(100 * (sloc - before_last) / before_last, 2)
    vals[project] = {
        "source_lines_of_code": {
            "currentValue": f"{sloc} loc",
            "cmpLastRevision": f"{value:+.1f}%",
        }
    }

    return vals

In [390]:
x = get_metrics_sloc_nonLinux("busybox")


220    1_35_0
221    1_36_0
Name: revision, dtype: object
209492 205741


In [391]:

merge_metrics(x)