In [1]:
import sys
sys.path.insert(0, '../archaeology')
%matplotlib inline
import csv
from collections import defaultdict, Counter


import seaborn as sns
import pandas as pd
import numpy as np
import dask.dataframe as dd
import matplotlib 
import numba
#matplotlib.rc('xtick', labelsize='medium') 
#matplotlib.rc('ytick', labelsize='medium') 
from sqlalchemy import func
from matplotlib import pyplot as plt

from db import connect

import analysis_helpers, importlib
importlib.reload(analysis_helpers)
from analysis_helpers import display_counts, violinplot, describe_processed
from analysis_helpers import numpy_distribution, boxplot_distribution
from analysis_helpers import distribution_with_boxplot, savefig, fig
from analysis_helpers import relative_var, var, cell_distribution
from analysis_helpers import calculate_auto, close_fig, print_result
from analysis_helpers import dbmt, DBMT, print_relative_var_group
from analysis_helpers import load_vars, group_run, dbmt_relative_var

from b8_notebook_aggregate import generate_execution_meta, generate_execution_base

Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.


In [2]:
vs = load_vars()
threshold = float(vs['s_a0_starforks'])

## Query

Select notebooks that are:
- valid
  
  ```NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
  AND n.processed & 15 = 0 AND n.skip & (1024 + 512) = 0```


- non-duplicated
  
  ```n.skip & (1024 + 512 + 128) = 0```
  
- executed

  ```n.max_execution_count > -0```
  
- unambiguous

  ```n.processing_cells = 0 AND n.unambiguous = 1```
  
- python notebooks

  ```n.language = 'python'
  AND LEFT(n.language_version, 1) IN ('2', '3')```

In [3]:
%%time
with connect() as session:
    print("Query Bool Aggregates")
    query = ("""
        SELECT n.*, {}, {}, {}
        FROM notebooks n, notebooks_bool_aggregates b
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND b.notebook_id = n.id
    """.format(
        ", ".join(
            'b."{}"'.format(key)
            for key in generate_execution_base("original_exe")
        ),
        ", ".join(
            'b."{}"'.format(key)
            for key in generate_execution_base("topdown")
        ),
        ", ".join(
            'b."{}"'.format(key)
            for key in generate_execution_base("execorder")
        )
    ))
    notebooks = pd.read_sql(query, session.connection())

Query Bool Aggregates
CPU times: user 36.2 s, sys: 1.78 s, total: 38 s
Wall time: 52.2 s


In [4]:
%%time
with connect() as session:
    print("Query Full Bool Aggregates")
    query = ("""
        SELECT n.*, {}, {}
        FROM notebooks n, notebooks_fullexecbool_aggregates b
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND b.notebook_id = n.id
    """.format(
        ", ".join(
            'b."{}"'.format(key)
            for key in generate_execution_base("fulltopdown")
        ),
        ", ".join(
            'b."{}"'.format(key)
            for key in generate_execution_base("fullexecorder")
        )
    ))
    fullexec = pd.read_sql(query, session.connection())

Query Full Bool Aggregates
CPU times: user 28.5 s, sys: 698 ms, total: 29.2 s
Wall time: 33.7 s


In [5]:
%%time
with connect() as session:
    print('Query Meta Aggregates')
    query = ("""
        SELECT n.id, b.original_exe_exception, b.execorder_exception, b.topdown_exception
        FROM notebooks n, notebooks_meta_aggregates b
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND b.notebook_id = n.id
    """)
    notebooks_meta = pd.read_sql(query, session.connection())

Query Meta Aggregates
CPU times: user 1.85 s, sys: 171 ms, total: 2.02 s
Wall time: 1min 7s


In [6]:
%%time
with connect() as session:
    print("Query Full Meta Aggregates")
    query = ("""
        SELECT n.id, b.fullexecorder_exception, b.fulltopdown_exception
        FROM notebooks n, notebooks_fullexecmeta_aggregates b
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND b.notebook_id = n.id
    """)
    fullexec_meta = pd.read_sql(query, session.connection())
    
    print("Query Repositories")
    t_repositories = pd.read_sql("""
        SELECT *
        FROM repositories
    """, session.connection())
    
    print("Unambiguous Python Notebooks:", len(notebooks))
    

Query Full Meta Aggregates
Query Repositories
Unambiguous Python Notebooks: 753405
CPU times: user 4.34 s, sys: 234 ms, total: 4.57 s
Wall time: 47.9 s


In [7]:
notebooks = notebooks.set_index("id").join(notebooks_meta.set_index("id")).reset_index()
fullexec = fullexec.set_index("id").join(fullexec_meta.set_index("id")).reset_index()
notebooks_repositories = notebooks.groupby("repository_id").first()[["stargazers", "forks", "starforks"]]
repositories = t_repositories.set_index('id').join(notebooks_repositories, how="inner")
repositories['id'] = repositories.index
repositories_with_setup_py = set(repositories[
    repositories["setups_count"] > 0
]["id"].tolist())
repositories_with_requirements_txt = set(repositories[
    repositories["requirements_count"] > 0
]["id"].tolist())
repositories_with_pipfile = set(repositories[
    (repositories["pipfiles_count"] > 0)
    | (repositories["pipfile_locks_count"] > 0)
]["id"].tolist())
repositories_any_dependency = (
    repositories_with_setup_py
    | repositories_with_requirements_txt
    | repositories_with_pipfile
)
notebooks.fillna(False, inplace=True)
fullexec.fillna(False, inplace=True)
original_executions = notebooks[
    notebooks["original_exe_did_not_skip_execution"]
]
execorder_executions = notebooks[
    notebooks["execorder_did_not_skip_execution"]
]
topdown_executions = notebooks[
    notebooks["topdown_did_not_skip_execution"]
]
fullexecorder_executions = fullexec[
    fullexec["fullexecorder_did_not_skip_execution"]
]
fulltopdown_executions = fullexec[
    fullexec["fulltopdown_did_not_skip_execution"]
]
filter_execorder = execorder_executions[
    ~execorder_executions["repository_id"].isin(repositories_any_dependency)
]
filter_topdown = topdown_executions[
    ~topdown_executions["repository_id"].isin(repositories_any_dependency)
]

len(fulltopdown_executions), len(fullexec)

(672235, 753404)

## Reproducibility distribution

In [8]:
%%time
def prepare(mode, norms=["_original", "_execution_count", "_image"], prefixes=[""], value=lambda: 0):
    result = {}
    for norm in norms:
        for prefix in prefixes:
            result[prefix + mode + norm] = value()
    return result

BINS = 1261
bins_arrays = {
    key: {
        **prepare("original", [""], prefixes=["diff_", "same_"], value=lambda: np.zeros(BINS)),
        **prepare("execorder", prefixes=["diff_", "same_"], value=lambda: np.zeros(BINS)),
        **prepare("topdown", prefixes=["diff_", "same_"], value=lambda: np.zeros(BINS)),
        **prepare("fullexecorder", prefixes=["diff_", "same_"], value=lambda: np.zeros(BINS)),
        **prepare("fulltopdown", prefixes=["diff_", "same_"], value=lambda: np.zeros(BINS)),
    } for key, _ in DBMT + [(None, '<deleted>')]
}
total_counts = {
    key: {
        **prepare("original", [""]),
        **prepare("execorder"),
        **prepare("topdown"),
        **prepare("fullexecorder"),
        **prepare("fulltopdown"),
    } for key, _ in DBMT + [(None, '<deleted>')]
}
diff_counts = {
    key: {
        **prepare("original", [""]),
        **prepare("execorder"),
        **prepare("topdown"),
        **prepare("fullexecorder"),
        **prepare("fulltopdown"),
    } for key, _ in DBMT + [(None, '<deleted>')]
}
mismatch = {
    key: {
        **prepare("original", [""], value=lambda: []),
        **prepare("execorder", value=lambda: []),
        **prepare("topdown", value=lambda: []),
        **prepare("fullexecorder", value=lambda: []),
        **prepare("fulltopdown", value=lambda: []),
    } for key, _ in DBMT + [(None, '<deleted>')]
}

CPU times: user 1.02 ms, sys: 43 µs, total: 1.07 ms
Wall time: 590 µs


In [9]:
def process_row_mode(mode, selectors, row, mismatch_attr, same_index_attr, diff_index_attr, same_attr, diff_attr):
    mismatch_value = getattr(row, mismatch_attr)
    same_index_value = getattr(row, same_index_attr)
    diff_index_value = getattr(row, diff_index_attr)
    same_value = getattr(row, same_attr)
    diff_value = getattr(row, diff_attr)
    
    for selector in selectors:
        mismatch[selector][mode].append(mismatch_value)
    for selector in selectors:
        total_counts[selector][mode] += len(same_index_value) + len(diff_index_value)
    for selector in selectors:
        diff_counts[selector][mode] += len(diff_index_value)
    if len(diff_value) > 1:
        for selector in selectors:
            bins_arrays[selector]['diff_' + mode] += np.array(diff_value)
    if len(same_value) > 1:
        for selector in selectors:
            bins_arrays[selector]['same_' + mode] += np.array(same_value)

def process_selector(row):
    selectors = ["d_"]
    if row.notebook_id in samples["notebook_id"].to_list():
        selectors.append("sd_")
    elif row.starforks is not None and row.starforks >= threshold:
        selectors.append('td_')
    return selectors
    

In [10]:
with connect() as session:
    print("Query Original")
    query = session.execute("""
        SELECT 
            n.id, n.id as notebook_id,
            m.original_exe_original_mismatch_rate as mismatch,
            m.original_exe_original_same_list as same,
            m.original_exe_original_diff_list as diff,
            m.original_exe_index_original_same_list as same_index,
            m.original_exe_index_original_diff_list as diff_index,
            n.stargazers, n.forks, n.starforks
        FROM notebooks n, notebooks_meta_aggregates m
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND m.notebook_id = n.id
    """)
    executed_ids = original_executions["id"].to_list()
    
    
    for row in query:
        if row.id not in executed_ids:
            continue
        selectors = process_selector(row)
        process_row_mode("original", selectors, row, "mismatch", "same_index", "diff_index", "same", "diff")

Query Original


NameError: name 'samples' is not defined

In [None]:
%%time
with connect() as session:
    print("Query Exec Order")
    query = session.execute("""
        SELECT 
            n.id, n.id as notebook_id,
            m.execorder_original_mismatch_rate as mismatch,
            m.execorder_original_same_list as same, 
            m.execorder_original_diff_list as diff,
            m.execorder_index_original_same_list as same_index,
            m.execorder_index_original_diff_list as diff_index,
            m.execorder_execution_count_mismatch_rate as mismatch_ec,
            m.execorder_execution_count_same_list as same_ec, 
            m.execorder_execution_count_diff_list as diff_ec,
            m.execorder_index_execution_count_same_list as same_index_ec,
            m.execorder_index_execution_count_diff_list as diff_index_ec,
            m.execorder_image_mismatch_rate as mismatch_i,
            m.execorder_image_same_list as same_i, 
            m.execorder_image_diff_list as diff_i,
            m.execorder_index_image_same_list as same_index_i,
            m.execorder_index_image_diff_list as diff_index_i,
            n.stargazers, n.forks, n.starforks
        FROM notebooks n, notebooks_meta_aggregates m
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND m.notebook_id = n.id
    """)
    executed_ids = filter_execorder["id"].to_list()
    
    
    for row in query:
        if row.id not in executed_ids:
            continue
        selectors = process_selector(row)
        process_row_mode("execorder_original", selectors, row, "mismatch", "same_index", "diff_index", "same", "diff")
        process_row_mode("execorder_execution_count", selectors, row, "mismatch_ec", "same_index_ec", "diff_index_ec", "same_ec", "diff_ec")
        process_row_mode("execorder_image", selectors, row, "mismatch_i", "same_index_i", "diff_index_i", "same_i", "diff_i")
        


In [None]:
%%time
with connect() as session:
    print("Query Topdown")
    query = session.execute("""
        SELECT 
            n.id, n.id as notebook_id,
            m.topdown_original_mismatch_rate as mismatch,
            m.topdown_original_same_list as same, 
            m.topdown_original_diff_list as diff,
            m.topdown_index_original_same_list as same_index,
            m.topdown_index_original_diff_list as diff_index,
            m.topdown_execution_count_mismatch_rate as mismatch_ec,
            m.topdown_execution_count_same_list as same_ec, 
            m.topdown_execution_count_diff_list as diff_ec,
            m.topdown_index_execution_count_same_list as same_index_ec,
            m.topdown_index_execution_count_diff_list as diff_index_ec,
            m.topdown_image_mismatch_rate as mismatch_i,
            m.topdown_image_same_list as same_i, 
            m.topdown_image_diff_list as diff_i,
            m.topdown_index_image_same_list as same_index_i,
            m.topdown_index_image_diff_list as diff_index_i,
            n.stargazers, n.forks, n.starforks
        FROM notebooks n, notebooks_meta_aggregates m
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND m.notebook_id = n.id
    """)
    executed_ids = filter_topdown["id"].to_list()
    
    
    for row in query:
        if row.id not in executed_ids:
            continue
        selectors = process_selector(row)
        process_row_mode("topdown_original", selectors, row, "mismatch", "same_index", "diff_index", "same", "diff")
        process_row_mode("topdown_execution_count", selectors, row, "mismatch_ec", "same_index_ec", "diff_index_ec", "same_ec", "diff_ec")
        process_row_mode("topdown_image", selectors, row, "mismatch_i", "same_index_i", "diff_index_i", "same_i", "diff_i")

In [None]:
%%time
with connect() as session:
    print("Query Full Exec Order")
    query = session.execute("""
        SELECT 
            n.id, n.id as notebook_id,
            m.fullexecorder_original_mismatch_rate as mismatch,
            m.fullexecorder_original_same_list as same, 
            m.fullexecorder_original_diff_list as diff,
            m.fullexecorder_index_original_same_list as same_index,
            m.fullexecorder_index_original_diff_list as diff_index,
            m.fullexecorder_execution_count_mismatch_rate as mismatch_ec,
            m.fullexecorder_execution_count_same_list as same_ec, 
            m.fullexecorder_execution_count_diff_list as diff_ec,
            m.fullexecorder_index_execution_count_same_list as same_index_ec,
            m.fullexecorder_index_execution_count_diff_list as diff_index_ec,
            m.fullexecorder_image_mismatch_rate as mismatch_i,
            m.fullexecorder_image_same_list as same_i, 
            m.fullexecorder_image_diff_list as diff_i,
            m.fullexecorder_index_image_same_list as same_index_i,
            m.fullexecorder_index_image_diff_list as diff_index_i,
            n.stargazers, n.forks, n.starforks
        FROM notebooks n, notebooks_fullexecmeta_aggregates m
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND m.notebook_id = n.id
    """)
    executed_ids = fullexecorder_executions["id"].to_list()
    
    
    for row in query:
        if row.id not in executed_ids:
            continue
        selectors = process_selector(row)
        process_row_mode("fullexecorder_original", selectors, row, "mismatch", "same_index", "diff_index", "same", "diff")
        process_row_mode("fullexecorder_execution_count", selectors, row, "mismatch_ec", "same_index_ec", "diff_index_ec", "same_ec", "diff_ec")
        process_row_mode("fullexecorder_image", selectors, row, "mismatch_i", "same_index_i", "diff_index_i", "same_i", "diff_i")

In [None]:
%%time
with connect() as session:
    print("Query Full Topdown")
    query = session.execute("""
        SELECT 
            n.id, n.id as notebook_id,
            m.fulltopdown_original_mismatch_rate as mismatch,
            m.fulltopdown_original_same_list as same, 
            m.fulltopdown_original_diff_list as diff,
            m.fulltopdown_index_original_same_list as same_index,
            m.fulltopdown_index_original_diff_list as diff_index,
            m.fulltopdown_execution_count_mismatch_rate as mismatch_ec,
            m.fulltopdown_execution_count_same_list as same_ec, 
            m.fulltopdown_execution_count_diff_list as diff_ec,
            m.fulltopdown_index_execution_count_same_list as same_index_ec,
            m.fulltopdown_index_execution_count_diff_list as diff_index_ec,
            m.fulltopdown_image_mismatch_rate as mismatch_i,
            m.fulltopdown_image_same_list as same_i, 
            m.fulltopdown_image_diff_list as diff_i,
            m.fulltopdown_index_image_same_list as same_index_i,
            m.fulltopdown_index_image_diff_list as diff_index_i,
            n.stargazers, n.forks, n.starforks
        FROM notebooks n, notebooks_fullexecmeta_aggregates m
        WHERE NOT (n.kernel = 'no-kernel' AND n.nbformat = '0')
        AND n.processed & 15 = 0
        AND n.skip & 2048 = 0
        AND n.skip & (1024 + 512) = 0
        AND n.skip & (1024 + 512 + 128) = 0
        AND n.max_execution_count > -0
        AND n.processing_cells = 0
        AND n.unambiguous = 1
        AND n.language = 'python'
        AND LEFT(n.language_version, 1) IN ('2', '3')
        AND m.notebook_id = n.id
    """)
    executed_ids = fulltopdown_executions["id"].to_list()
    
    
    for row in query:
        if row.id not in executed_ids:
            continue
        selectors = process_selector(row)
        process_row_mode("fulltopdown_original", selectors, row, "mismatch", "same_index", "diff_index", "same", "diff")
        process_row_mode("fulltopdown_execution_count", selectors, row, "mismatch_ec", "same_index_ec", "diff_index_ec", "same_ec", "diff_ec")
        process_row_mode("fulltopdown_image", selectors, row, "mismatch_i", "same_index_i", "diff_index_i", "same_i", "diff_i")
            

## Repro diff comparison

### Original

In [None]:
select = ["same_original", "diff_original"]

def original_exe_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_original_exe_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, original_exe_repro_diff_comparison);

### Exec. Order - Original

In [None]:
select = ["same_execorder_original", "diff_execorder_original"]

def execorder_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_execorder_original_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")

group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, execorder_execution_count_repro_diff_comparison);

### Exec. Order - Execution Counter Normalization

In [None]:
select = ["same_execorder_execution_count", "diff_execorder_execution_count"]

def execorder_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_execorder_execution_count_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")

group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, execorder_execution_count_repro_diff_comparison);

### Exec. Order - Image Normalization

In [None]:
select = ["same_execorder_image", "diff_execorder_image"]

def execorder_image_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_execorder_image_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, execorder_image_repro_diff_comparison);

### Top Down - Original

In [None]:
select = ["same_topdown_original", "diff_topdown_original"]

def topdown_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_topdown_original_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, topdown_execution_count_repro_diff_comparison);

### Top Down - Execution Counter Normalization

In [None]:
select = ["same_topdown_execution_count", "diff_topdown_execution_count"]

def topdown_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_topdown_execution_count_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, topdown_execution_count_repro_diff_comparison);

### Top Down - Image Normalization

In [None]:
select = ["same_topdown_image", "diff_topdown_image"]

def topdown_image_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_topdown_image_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")

group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, topdown_image_repro_diff_comparison);

### Bloated Exec Order - Original

In [None]:
select = ["same_fullexecorder_original", "diff_fullexecorder_original"]

def fullexecorder_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_fullexecorder_original_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, fullexecorder_execution_count_repro_diff_comparison);

### Bloated Exec Order - Execution Counter Normalization

In [None]:
select = ["same_fullexecorder_execution_count", "diff_fullexecorder_execution_count"]

def fullexecorder_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_fullexecorder_execution_count_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, fullexecorder_execution_count_repro_diff_comparison);

### Bloated Exec Order - Image Normalization

In [None]:
select = ["same_fullexecorder_image", "diff_fullexecorder_image"]

def fullexecorder_image_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_fullexecorder_image_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, fullexecorder_image_repro_diff_comparison);

### Bloated Top Down - Original

In [None]:
select = ["same_fulltopdown_original", "diff_fulltopdown_original"]

def fulltopdown_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_fulltopdown_original_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, fulltopdown_execution_count_repro_diff_comparison);

### Bloated Top Down - Execution Counter Normalization

In [None]:
select = ["same_fulltopdown_execution_count", "diff_fulltopdown_execution_count"]

def fulltopdown_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_fulltopdown_execution_count_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, fulltopdown_execution_count_repro_diff_comparison);

### Bloated Top Down - Image Normalization

In [None]:
select = ["same_fulltopdown_image", "diff_fulltopdown_image"]

def fulltopdown_execution_count_repro_diff_comparison(bins_arrays, prefix):
    with cell_distribution(prefix + "a6_fulltopdown_image_repro_diff_comparison", 6, 2, select, BINS - 1, bins_arrays, figf=fig) as ax:
        ax.annotate("different result", xy=(610, 85), color="white", weight="bold")
        ax.annotate("same result", xy=(30, 20), color="white", weight="bold")
        #ax.set_xticklabels(["First", "Middle", "Last"])
        #ax.set_xlabel("Execution Counter Order")
group_run([bins_arrays[prefix] for prefix, _ in DBMT], DBMT, fulltopdown_execution_count_repro_diff_comparison);

Other execution analyses available at A6.RQ7