In [1]:
import pandas
import sys
import os
import json
sys.path.insert(0, os.getcwd())
from helpers import recursive_find, read_json, create_fedora_results_table

In [2]:
# Read in the experiment json files. One experiment means one package at once version
# across splices (versions of a single dependencny) and predictors
experiments = list(recursive_find("artifacts/results/extracted/fedora", "*.json"))
print('Found %s experiment library files.' % len(experiments))

Found 10146 experiment library files.


In [3]:
df = create_fedora_results_table(experiments)
display(df)

Unnamed: 0,a,b,original,changed,analysis,seconds,predictor,prediction
0,fedora-libs-35,fedora-libs-37,first/usr/lib64/libQt5Quick.so.5.15.5,second/usr/lib64/libQt5Quick.so.5.15.5,missing-previously-found-symbols,0.002819,symbols,True
1,fedora-libs-35,fedora-libs-37,first/usr/lib64/libQt5Quick.so.5.15.5,second/usr/lib64/libQt5Quick.so.5.15.5,missing-previously-found-exports,0.254755,symbols,True
2,fedora-libs-35,fedora-libs-37,first/usr/lib64/libQt5Quick.so.5.15.5,second/usr/lib64/libQt5Quick.so.5.15.5,abidiff,49.429475,libabigail,True
3,fedora-libs-35,fedora-libs-37,first/usr/lib64/libQt5Quick.so.5.15.5,second/usr/lib64/libQt5Quick.so.5.15.5,abi-compliance-tester,625.529167,abi-laboratory,True
4,fedora-libs-34,fedora-libs-37,first/usr/lib64/libx11globalcomm.so.1.0.0,second/usr/lib64/libx11globalcomm.so.1.0.0,missing-previously-found-symbols,0.000072,symbols,True
...,...,...,...,...,...,...,...,...
40579,fedora-libs-34,fedora-libs-36,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,abi-compliance-tester,31.900113,abi-laboratory,True
40580,fedora-libs-36,fedora-libs-37,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,missing-previously-found-symbols,0.000897,symbols,True
40581,fedora-libs-36,fedora-libs-37,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,missing-previously-found-exports,0.106127,symbols,True
40582,fedora-libs-36,fedora-libs-37,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,abidiff,1.399777,libabigail,True


In [4]:
# How many unique libraries?
library_count = list(df['original'].values) + list(df['changed'].values)
print('Found %s total libraries' % len(library_count))

Found 81168 total libraries


In [5]:
# Symbols has bug that needs fixing, so these runs are for libabigail / abilab only.
df = df[df["analysis"] != "missing-previously-found-exports"]
df = df[df["analysis"] != "missing-previously-found-symbols"]

# Remove unknown predictions
df = df[df['prediction'] != "Unknown"]

# Look to see if we have True/False
for predictor in df['predictor'].unique():
    print(predictor)
    print(df[ df['predictor']==predictor]['prediction'].unique())

# disagree = df.groupby(["a","b", "original", "changed"])['prediction'].nunique() > 1
# disagree = pandas.DataFrame(disagree)
# Where all predictors agree
all_predictors_agree = pandas.DataFrame(df.groupby(['a', 'b', 'original', 'changed'])['prediction'].nunique() == 1).reset_index()
df_all_agree = df.merge(all_predictors_agree, on=['a', 'b', 'original', 'changed'], suffixes=["", "_all_agree"])
df_all_agree = df_all_agree[df_all_agree['prediction_all_agree']]
df_all_agree

libabigail
[True False]
abi-laboratory
[True False]


Unnamed: 0,a,b,original,changed,analysis,seconds,predictor,prediction,prediction_all_agree
0,fedora-libs-35,fedora-libs-37,first/usr/lib64/libQt5Quick.so.5.15.5,second/usr/lib64/libQt5Quick.so.5.15.5,abidiff,49.429475,libabigail,True,True
1,fedora-libs-35,fedora-libs-37,first/usr/lib64/libQt5Quick.so.5.15.5,second/usr/lib64/libQt5Quick.so.5.15.5,abi-compliance-tester,625.529167,abi-laboratory,True,True
2,fedora-libs-34,fedora-libs-37,first/usr/lib64/libx11globalcomm.so.1.0.0,second/usr/lib64/libx11globalcomm.so.1.0.0,abidiff,0.379183,libabigail,True,True
3,fedora-libs-34,fedora-libs-37,first/usr/lib64/libx11globalcomm.so.1.0.0,second/usr/lib64/libx11globalcomm.so.1.0.0,abi-compliance-tester,4.16806,abi-laboratory,True,True
4,fedora-libs-36,fedora-libs-37,first/usr/lib64/librpmbuild.so.9.2.0,second/usr/lib64/librpmbuild.so.9.2.0,abidiff,0.070948,libabigail,True,True
...,...,...,...,...,...,...,...,...,...
20287,fedora-libs-35,fedora-libs-36,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,abi-compliance-tester,31.600611,abi-laboratory,True,True
20288,fedora-libs-34,fedora-libs-36,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,abidiff,1.420139,libabigail,True,True
20289,fedora-libs-34,fedora-libs-36,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,abi-compliance-tester,31.900113,abi-laboratory,True,True
20290,fedora-libs-36,fedora-libs-37,first/usr/lib/systemd/libsystemd-shared-250.so,second/usr/lib/systemd/libsystemd-shared-250.so,abidiff,1.399777,libabigail,True,True


In [6]:
# Convert to wide format where analysis and predictor are columns.
df_wide = pandas.pivot(df, columns=['analysis', 'predictor'], index=['a', 'b', 'original', 'changed'], values='prediction')
df_wide

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,analysis,abidiff,abi-compliance-tester
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,predictor,libabigail,abi-laboratory
a,b,original,changed,Unnamed: 4_level_2,Unnamed: 5_level_2
fedora-libs-34,fedora-libs-35,first/usr/lib/gcc/x86_64-redhat-linux/12/32/libasan.so,second/usr/lib/gcc/x86_64-redhat-linux/12/32/libasan.so,False,False
fedora-libs-34,fedora-libs-35,first/usr/lib/gcc/x86_64-redhat-linux/12/32/libatomic.so,second/usr/lib/gcc/x86_64-redhat-linux/12/32/libatomic.so,False,False
fedora-libs-34,fedora-libs-35,first/usr/lib/gcc/x86_64-redhat-linux/12/32/libgcc_s.so,second/usr/lib/gcc/x86_64-redhat-linux/12/32/libgcc_s.so,False,False
fedora-libs-34,fedora-libs-35,first/usr/lib/gcc/x86_64-redhat-linux/12/32/libitm.so,second/usr/lib/gcc/x86_64-redhat-linux/12/32/libitm.so,False,False
fedora-libs-34,fedora-libs-35,first/usr/lib/gcc/x86_64-redhat-linux/12/32/libquadmath.so,second/usr/lib/gcc/x86_64-redhat-linux/12/32/libquadmath.so,False,False
...,...,...,...,...,...
fedora-libs-36,fedora-libs-37,first/usr/lib64/security/pam_wheel.so,second/usr/lib64/security/pam_wheel.so,True,True
fedora-libs-36,fedora-libs-37,first/usr/lib64/security/pam_xauth.so,second/usr/lib64/security/pam_xauth.so,True,True
fedora-libs-36,fedora-libs-37,first/usr/lib64/speech-dispatcher/spd_alsa.so,second/usr/lib64/speech-dispatcher/spd_alsa.so,True,True
fedora-libs-36,fedora-libs-37,first/usr/lib64/speech-dispatcher/spd_libao.so,second/usr/lib64/speech-dispatcher/spd_libao.so,True,True


In [7]:
# Join analysis/predictor columns
df_wide = df_wide.reset_index()
df_wide.columns = ["_".join(names) for names in df_wide.columns]
df_wide.columns

Index(['a_', 'b_', 'original_', 'changed_', 'abidiff_libabigail',
       'abi-compliance-tester_abi-laboratory'],
      dtype='object')

In [8]:
# Get predictions only
df_predictions = df_wide[['abidiff_libabigail', 'abi-compliance-tester_abi-laboratory']]
df_predictions

Unnamed: 0,abidiff_libabigail,abi-compliance-tester_abi-laboratory
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
10141,True,True
10142,True,True
10143,True,True
10144,True,True


In [9]:
# Filter to where predictions are either "True" or "False" only.
num_predictors = 2
df_predictions = df_predictions[(df_predictions == True).sum(axis=1) + (df_predictions == False).sum(axis=1) == num_predictors]

# Look to see if we have True/False
for column in df_predictions.columns:
    print(column)
    print(df_predictions[column].unique())
    
df_predictions

abidiff_libabigail
[False True]
abi-compliance-tester_abi-laboratory
[False True]


Unnamed: 0,abidiff_libabigail,abi-compliance-tester_abi-laboratory
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
10141,True,True
10142,True,True
10143,True,True
10144,True,True


In [10]:
# Correlations between predictors.
# The NaN means that the values don't vary (as we see above, the symbols check is always true)
(df_predictions == True).corr()

Unnamed: 0,abidiff_libabigail,abi-compliance-tester_abi-laboratory
abidiff_libabigail,1.0,0.620286
abi-compliance-tester_abi-laboratory,0.620286,1.0
