In [15]:
%load_ext autoreload
%autoreload 2

import git
from git import Repo

from unidiff import PatchSet

import subprocess
import json
import pandas as pd
import numpy as np
import matplotlib
import sys
import matplotlib.pyplot as plt
import sklearn
import ipywidgets as widgets


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import tree
from sklearn.preprocessing import OrdinalEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC

from src.edit_distance_feature import edit_distance_feature

from src.reorderer import *
from src.reordering_analyzer import ReorderingAnalyzer

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
mutants_and_tests = pd.read_pickle('data/jinja_full_with_context.pkl')

# Sparsify data for faster analysis: 
sparsify = True
if sparsify:
    mutants_and_tests = mutants_and_tests.loc[mutants_and_tests['test_id'] < 100].loc[mutants_and_tests['mutant_id'] < 1200]

mutants_and_tests.reset_index()
mutants_and_tests["outcome"] = mutants_and_tests["outcome"].astype('bool')
mutants_and_tests["outcome"]
display(mutants_and_tests)
print(mutants_and_tests.columns)

Unnamed: 0,mutant_id,contains_branch_mutant,contains_equality_comparison_mutant,contains_loop_mutant,contains_math_operands_mutant,current_line,line_number_changed,modified_file_path,modified_method,previous_line,...,setup_outcome,setup_duration,call_outcome,call_duration,teardown_outcome,teardown_duration,contains_branch_execution,contains_loop_execution,contains_math_operands_execution,contains_equality_comparison_execution
323100,985.0,1.0,0.0,0.0,1.0,proxy.__name__ = None,313.0,src/jinja2/runtime.py,_all,,...,True,0.027371,True,0.006700,True,0.000230,,,,
323101,985.0,1.0,0.0,0.0,1.0,proxy.__name__ = None,313.0,src/jinja2/runtime.py,_all,,...,True,0.000240,True,0.001041,True,0.000163,,,,
323102,985.0,1.0,0.0,0.0,1.0,proxy.__name__ = None,313.0,src/jinja2/runtime.py,_all,,...,True,0.000212,True,0.000556,True,0.000142,,,,
323103,985.0,1.0,0.0,0.0,1.0,proxy.__name__ = None,313.0,src/jinja2/runtime.py,_all,,...,True,0.000180,True,0.000651,True,0.000149,,,,
323104,985.0,1.0,0.0,0.0,1.0,proxy.__name__ = None,313.0,src/jinja2/runtime.py,_all,,...,True,0.000192,True,0.000613,True,0.000138,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3531545,738.0,1.0,1.0,0.0,1.0,elif token != TOKEN_OPERATOR:,616.0,src/jinja2/lexer.py,wrap,,...,True,0.000339,False,0.000235,,,,,,
3531546,738.0,1.0,1.0,0.0,1.0,elif token != TOKEN_OPERATOR:,616.0,src/jinja2/lexer.py,wrap,,...,True,0.000331,False,0.000244,,,,,,
3531547,738.0,1.0,1.0,0.0,1.0,elif token != TOKEN_OPERATOR:,616.0,src/jinja2/lexer.py,wrap,,...,True,0.000331,False,0.000221,,,,,,
3531548,738.0,1.0,1.0,0.0,1.0,elif token != TOKEN_OPERATOR:,616.0,src/jinja2/lexer.py,wrap,,...,True,0.000342,False,0.000233,,,,,,


Index(['mutant_id', 'contains_branch_mutant',
       'contains_equality_comparison_mutant', 'contains_loop_mutant',
       'contains_math_operands_mutant', 'current_line', 'line_number_changed',
       'modified_file_path', 'modified_method', 'previous_line', 'repo_path',
       'outcome', 'test_id', 'full_name', 'name', 'filepath', 'duration',
       'setup_outcome', 'setup_duration', 'call_outcome', 'call_duration',
       'teardown_outcome', 'teardown_duration', 'contains_branch_execution',
       'contains_loop_execution', 'contains_math_operands_execution',
       'contains_equality_comparison_execution'],
      dtype='object')


# Data Preparation

### Cleansing

In [3]:
# Encode None as false in the context stuff
for column in ["contains_branch_mutant", "contains_loop_mutant", "contains_math_operands_mutant", "contains_equality_comparison_mutant",
               "contains_branch_execution", "contains_loop_execution", "contains_math_operands_execution", "contains_equality_comparison_execution",
                "teardown_outcome", "setup_outcome", "call_outcome", "outcome"]:
    mutants_and_tests.loc[mutants_and_tests[column].isnull(), column] = False
    #mutants_and_tests.loc[np.isnan(mutants_and_tests[column]), column] = False

#Encode NaN as 0 in the duration stuff
for column in ["teardown_duration", "setup_duration", "call_duration"]:
    mutants_and_tests.loc[pd.isnull(mutants_and_tests[column]), column] = 0

In [4]:
# Sometimes, test_ids only occur very rarely and make problems in the reordering. They can be deleted here from the dataset


test_ids_to_kill = [] # e.g.: [82, 83]
for test_id in test_ids_to_kill:
    mutants_and_tests = mutants_and_tests.loc[mutants_and_tests['test_id'] != test_id]
mutants_and_tests = mutants_and_tests.copy()

### Feature Generation and Encoding

In [5]:
# Add edit distance feature

# Fix object and fill null values
mutants_and_tests["name"] = mutants_and_tests["name"].astype('string')
mutants_and_tests["modified_method"] = mutants_and_tests["modified_method"].astype('string')

mutants_and_tests["name"].loc[pd.isnull(mutants_and_tests["name"])] = ""
mutants_and_tests["modified_method"].loc[pd.isnull(mutants_and_tests["modified_method"])] = ""

mutants_and_tests['edit_distance'] = edit_distance_feature(
    mutants_and_tests['modified_method'],
    mutants_and_tests['name']
)
mutants_and_tests['edit_distance'].value_counts()

30    7426
28    7084
35    5909
27    5822
32    5618
33    5462
31    5416
26    4929
29    4855
34    4832
25    4763
38    4743
39    4403
36    3630
37    3594
24    3224
42    3016
40    2910
23    2510
41    2415
43    2360
44    2079
21    2035
22    1842
19    1592
46    1458
47    1424
18    1271
16    1232
17    1219
45    1208
20    1158
15    1007
48     980
14     709
49     577
50     430
52     294
13     286
53     244
54     243
51     121
12      36
11      15
10      15
9        2
8        2
Name: edit_distance, dtype: int64

In [6]:
# Now create X and y, since we will encode columns in X and want to keep them in mutants_and_tests
X = mutants_and_tests.drop(['outcome'], axis=1)
y = mutants_and_tests["outcome"]

# Throw out nan values:
previous_length = len(X)
X = X.loc[mutants_and_tests.isnull().any(axis=1) == False]
y = mutants_and_tests["outcome"].loc[mutants_and_tests.isnull().any(axis=1) == False]
print("Kicked out " + str(previous_length - len(X)) + " values from " + str(previous_length) + " total values. (" + str((1 - len(X) / previous_length) * 100) + "%)")

Kicked out 0 values from 116400 total values. (0.0%)


In [7]:
# Encode stuff we want to encode:
encoded_columns = ["modified_method", "modified_file_path", "name", "filepath", "current_line", "previous_line"]
# Encoding

enc = OrdinalEncoder()
X_enc = enc.fit_transform(X[encoded_columns])
i = 0
for column_name in encoded_columns:
    X[column_name] = X_enc[:,i]
    i += 1


### Feature Selection
(you can reload if you want all features selected)

In [8]:
# Dangerous features are features you probably don't want in your dataset, since they can spoil y
dangerous_features = ['setup_outcome', 'setup_duration', 'call_outcome', 'call_duration', 'teardown_outcome', 'teardown_duration']

# Unencoded
unencoded_features = ['repo_path', 'full_name']

X = X.drop(dangerous_features, axis=1).drop(unencoded_features, axis=1)


In [9]:

w = widgets.SelectMultiple(
    options=X.columns,
    value=tuple(X.columns),
    rows=len(X.columns),
    description='Select the desired features',
    disabled=False
)
w

SelectMultiple(description='Select the desired features', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…

In [10]:
feature_columns = list(w.value)
X = X[feature_columns]

In [11]:
# Split test and train
split_mutant = X['mutant_id'].max() * 0.7
print("split_mutant is " + str(split_mutant))
# TODO: this has to split of whole
train = X.loc[X['mutant_id'] < split_mutant]
test = X.loc[X['mutant_id'] >= split_mutant]

X_train = X.loc[X['mutant_id'] < split_mutant]
y_train = y.loc[X['mutant_id'] < split_mutant]

X_test = X.loc[X['mutant_id'] >= split_mutant]
y_test = y.loc[X['mutant_id'] >= split_mutant]

# shuffle alternative: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=420, shuffle=True)


split_mutant is 839.3


# Reordering

To help you reorder the tests, there are some usefull functionalities in ```src.reorderer```
The most usefull is probably the ```BinaryPredictionReorderer```. You can hand in a binary predictor for failing tests, and it will autmatically
generate an order of the tests that has the predicted failing tests in front. As an easy example, you can put in a DecisionTree Classifier and see how it performs. (Hint: Use the sparsify switch in the cell loading the dataset (2. cell in the notebook) for fast tryout analysis)

In [20]:
class BaselineBinaryPredictor:
    
    def name(self):
        return "BaselineBinaryPredictor"
    def fit(self, X_train, y_train):
        self.X_train = X_train
    
    
    def predict(self, X_test):
        predictions = []
        for index, row in X_test.iterrows():
            # Select only rows from X_train with the same test_id
            correct_tests = self.X_train.loc[self.X_train['test_id'] == row['test_id']]
            #print("Test_id: " + str(row['test_id']))
            #print("Mutant_id: " + str(row['mutant_id']))
            #print(len(correct_tests))
            mutant_id = row['mutant_id']
            #print(correct_tests)
            nearest_mutant_id_index = abs(correct_tests['mutant_id'] - mutant_id).idxmin()
            #print(nearest_mutant_id_index)
            #print(y_train)
            predictions.append(y_train[nearest_mutant_id_index])
        return predictions
        
bpr_baseline = BinaryPredictionReorderer(BaselineBinaryPredictor())
bpr_baseline.fit(X_train, y_train)
bpr_baseline_order = bpr_baseline.predict(X_test)



## Evaluation

In [16]:

# Pipeline class to takeall the predictors and evaluate them:
evaluation = ReorderingAnalyzer([
    NaiveReorderer(),
    AverageReorderer(),
    BinaryPredictionReorderer(tree.DecisionTreeClassifier(ccp_alpha=0.001)),
    BinaryPredictionReorderer(KNeighborsClassifier()),
    BinaryPredictionReorderer(ExtraTreesClassifier(n_estimators=250,
                              random_state=420)),
    BinaryPredictionReorderer((make_pipeline(StandardScaler(), SVC(gamma='auto')))),
    BinaryPredictionReorderer(GradientBoostingClassifier(random_state=420))
])
evaluation.fit(X_train, y_train)
evaluation.predict(X_test)
evaluation_data = evaluation.evaluate(mutants_and_tests)
evaluation_data

....... finished.
....... finished.
....... finished.
....... finished.
....... finished.
....... finished.
....... finished.


Unnamed: 0,apfd_avg,apfd_max,apfd_min,first_failing_duration_avg,last_failing_duration_avg
NaiveReorderer,0.498664,0.955,0.02,0.118097,0.210503
AverageReorderer,0.589625,0.99,0.035,0.074543,0.201799
BinaryPredictionReorderer(DecisionTreeClassifier),0.511334,0.995,0.02,0.096198,0.207704
BinaryPredictionReorderer(KNeighborsClassifier),0.515266,0.955,0.04,0.105108,0.209734
BinaryPredictionReorderer(ExtraTreesClassifier),0.510267,0.955,0.02,0.11657,0.210742
BinaryPredictionReorderer(Pipeline),0.435885,0.875,0.02,0.104435,0.225624
BinaryPredictionReorderer(GradientBoostingClassifier),0.497081,0.985,0.02,0.113088,0.212284
