In [1]:
%load_ext autoreload
%autoreload 2

import git
from git import Repo

from unidiff import PatchSet

import subprocess
import json
import pandas as pd
import numpy as np
import matplotlib
import sys
import matplotlib.pyplot as plt
import sklearn
import ipywidgets as widgets


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import tree
from sklearn.preprocessing import OrdinalEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC

from src.edit_distance_feature import edit_distance_feature

from src.reorderer import *
from src.reordering_analyzer import ReorderingAnalyzer

%load_ext line_profiler

In [5]:
mutants_and_tests = pd.read_pickle('data/httpie_full_with_context.pkl')

# Sparsify data for faster analysis: 
sparsify = False
if sparsify:
    mutants_and_tests = mutants_and_tests.loc[mutants_and_tests['test_id'] < 100].loc[mutants_and_tests['mutant_id'] < 1200]

mutants_and_tests.reset_index()
mutants_and_tests["outcome"] = mutants_and_tests["outcome"].astype('bool')
mutants_and_tests["outcome"]
display(mutants_and_tests)
print(mutants_and_tests.columns)

Unnamed: 0,mutant_id,contains_branch_mutant,contains_equality_comparison_mutant,contains_loop_mutant,contains_math_operands_mutant,current_line,line_number_changed,modified_file_path,modified_method,previous_line,...,setup_outcome,setup_duration,call_outcome,call_duration,teardown_outcome,teardown_duration,contains_branch_execution,contains_loop_execution,contains_math_operands_execution,contains_equality_comparison_execution
0,1351.0,1.0,1.0,1.0,1.0,sys.stderr.write('XX\nXX'),144.0,httpie/cli/argtypes.py,prompt_password,,...,True,0.037148,True,0.009795,True,0.000361,,,,
1,1351.0,1.0,1.0,1.0,1.0,sys.stderr.write('XX\nXX'),144.0,httpie/cli/argtypes.py,prompt_password,,...,True,0.000785,True,0.013675,True,0.000518,,,,
2,1351.0,1.0,1.0,1.0,1.0,sys.stderr.write('XX\nXX'),144.0,httpie/cli/argtypes.py,prompt_password,,...,True,0.001102,True,0.012399,True,0.000281,,,,
3,1351.0,1.0,1.0,1.0,1.0,sys.stderr.write('XX\nXX'),144.0,httpie/cli/argtypes.py,prompt_password,,...,True,0.000470,True,0.005241,True,0.000274,,,,
4,1351.0,1.0,1.0,1.0,1.0,sys.stderr.write('XX\nXX'),144.0,httpie/cli/argtypes.py,prompt_password,,...,True,0.000430,True,0.005326,True,0.000222,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322364,1526.0,1.0,1.0,1.0,1.0,message_class = None,113.0,httpie/output/writer.py,build_output_stream_for_message,,...,,,,,,,,,,
322365,1527.0,1.0,1.0,1.0,1.0,if env.stdout_isatty or with_body:,123.0,httpie/output/writer.py,build_output_stream_for_message,,...,,,,,,,,,,
322366,1528.0,1.0,1.0,1.0,1.0,yield b'XX\n\nXX',126.0,httpie/output/writer.py,build_output_stream_for_message,,...,,,,,,,,,,
322367,1529.0,1.0,1.0,1.0,1.0,if env.stdout_isatty and not args.prettify:,136.0,httpie/output/writer.py,get_stream_type_and_kwargs,,...,,,,,,,,,,


Index(['mutant_id', 'contains_branch_mutant',
       'contains_equality_comparison_mutant', 'contains_loop_mutant',
       'contains_math_operands_mutant', 'current_line', 'line_number_changed',
       'modified_file_path', 'modified_method', 'previous_line', 'repo_path',
       'outcome', 'test_id', 'full_name', 'name', 'filepath', 'duration',
       'setup_outcome', 'setup_duration', 'call_outcome', 'call_duration',
       'teardown_outcome', 'teardown_duration', 'contains_branch_execution',
       'contains_loop_execution', 'contains_math_operands_execution',
       'contains_equality_comparison_execution'],
      dtype='object')


# Data Preparation

### Cleansing

In [6]:
# Encode None as false in the context stuff
for column in ["contains_branch_mutant", "contains_loop_mutant", "contains_math_operands_mutant", "contains_equality_comparison_mutant",
               "contains_branch_execution", "contains_loop_execution", "contains_math_operands_execution", "contains_equality_comparison_execution",
                "teardown_outcome", "setup_outcome", "call_outcome", "outcome"]:
    mutants_and_tests.loc[mutants_and_tests[column].isnull(), column] = False
    #mutants_and_tests.loc[np.isnan(mutants_and_tests[column]), column] = False

#Encode NaN as 0 in the duration stuff
for column in ["teardown_duration", "setup_duration", "call_duration"]:
    mutants_and_tests.loc[pd.isnull(mutants_and_tests[column]), column] = 0

In [7]:
# Sometimes, test_ids only occur very rarely and make problems in the reordering. They can be deleted here from the dataset


test_ids_to_kill = [] # e.g.: [82, 83]
for test_id in test_ids_to_kill:
    mutants_and_tests = mutants_and_tests.loc[mutants_and_tests['test_id'] != test_id]
mutants_and_tests = mutants_and_tests.copy()

### Feature Generation and Encoding

In [8]:
# Add edit distance feature

# Fix object and fill null values
mutants_and_tests["name"] = mutants_and_tests["name"].astype('string')
mutants_and_tests["modified_method"] = mutants_and_tests["modified_method"].astype('string')

mutants_and_tests["name"].loc[pd.isnull(mutants_and_tests["name"])] = ""
mutants_and_tests["modified_method"].loc[pd.isnull(mutants_and_tests["modified_method"])] = ""

mutants_and_tests['edit_distance'] = edit_distance_feature(
    mutants_and_tests['modified_method'],
    mutants_and_tests['name']
)
mutants_and_tests['edit_distance'].value_counts()

21     8519
20     8494
22     8482
24     8361
19     7816
       ... 
133       9
4         9
5         8
121       5
3         2
Name: edit_distance, Length: 154, dtype: int64

In [9]:
# Now create X and y, since we will encode columns in X and want to keep them in mutants_and_tests
X = mutants_and_tests.drop(['outcome'], axis=1)
y = mutants_and_tests["outcome"]

# Throw out nan values:
previous_length = len(X)
X = X.loc[mutants_and_tests.isnull().any(axis=1) == False]
y = mutants_and_tests["outcome"].loc[mutants_and_tests.isnull().any(axis=1) == False]
print("Kicked out " + str(previous_length - len(X)) + " values from " + str(previous_length) + " total values. (" + str((1 - len(X) / previous_length) * 100) + "%)")

Kicked out 605 values from 322369 total values. (0.18767313234212546%)


In [10]:
# Encode stuff we want to encode:
encoded_columns = ["modified_method", "modified_file_path", "name", "filepath", "current_line", "previous_line"]
# Encoding

enc = OrdinalEncoder()
X_enc = enc.fit_transform(X[encoded_columns])
i = 0
for column_name in encoded_columns:
    X[column_name] = X_enc[:,i]
    i += 1


### Feature Selection
(you can reload if you want all features selected)

In [11]:
# Dangerous features are features you probably don't want in your dataset, since they can spoil y
dangerous_features = ['setup_outcome', 'setup_duration', 'call_outcome', 'call_duration', 'teardown_outcome', 'teardown_duration']

# Unencoded
unencoded_features = ['repo_path', 'full_name']

X = X.drop(dangerous_features, axis=1).drop(unencoded_features, axis=1)


In [12]:

w = widgets.SelectMultiple(
    options=X.columns,
    value=tuple(X.columns),
    rows=len(X.columns),
    description='Select the desired features',
    disabled=False
)
w

SelectMultiple(description='Select the desired features', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…

In [13]:
feature_columns = list(w.value)
X = X[feature_columns]

In [14]:
# Split test and train
split_mutant = X['mutant_id'].max() * 0.7
print("split_mutant is " + str(split_mutant))
# TODO: this has to split of whole
train = X.loc[X['mutant_id'] < split_mutant]
test = X.loc[X['mutant_id'] >= split_mutant]

X_train = X.loc[X['mutant_id'] < split_mutant]
y_train = y.loc[X['mutant_id'] < split_mutant]

X_test = X.loc[X['mutant_id'] >= split_mutant]
y_test = y.loc[X['mutant_id'] >= split_mutant]

# shuffle alternative: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=420, shuffle=True)


split_mutant is 1207.5


# Reordering

To help you reorder the tests, there are some usefull functionalities in ```src.reorderer```
The most usefull is probably the ```BinaryPredictionReorderer```. You can hand in a binary predictor for failing tests, and it will autmatically
generate an order of the tests that has the predicted failing tests in front. As an easy example, you can put in a DecisionTree Classifier and see how it performs. (Hint: Use the sparsify switch in the cell loading the dataset (2. cell in the notebook) for fast tryout analysis)

In [20]:
class BaselineBinaryPredictor:
    
    def name(self):
        return "BaselineBinaryPredictor"
    def fit(self, X_train, y_train):
        self.X_train = X_train
    
    
    def predict(self, X_test):
        predictions = []
        for index, row in X_test.iterrows():
            # Select only rows from X_train with the same test_id
            correct_tests = self.X_train.loc[self.X_train['test_id'] == row['test_id']]
            #print("Test_id: " + str(row['test_id']))
            #print("Mutant_id: " + str(row['mutant_id']))
            #print(len(correct_tests))
            mutant_id = row['mutant_id']
            #print(correct_tests)
            nearest_mutant_id_index = abs(correct_tests['mutant_id'] - mutant_id).idxmin()
            #print(nearest_mutant_id_index)
            #print(y_train)
            predictions.append(y_train[nearest_mutant_id_index])
        return predictions
        
bpr_baseline = BinaryPredictionReorderer(BaselineBinaryPredictor())
bpr_baseline.fit(X_train, y_train)
bpr_baseline_order = bpr_baseline.predict(X_test)



## Evaluation

In [15]:

# Pipeline class to takeall the predictors and evaluate them:
evaluation = ReorderingAnalyzer([
    NaiveReorderer(),
    AverageReorderer(),
    BinaryPredictionReorderer(tree.DecisionTreeClassifier(ccp_alpha=0.001)),
    BinaryPredictionReorderer(KNeighborsClassifier()),
    BinaryPredictionReorderer(ExtraTreesClassifier(n_estimators=250,
                              random_state=420)),
    #BinaryPredictionReorderer((make_pipeline(StandardScaler(), SVC(gamma='auto')))),
    BinaryPredictionReorderer(GradientBoostingClassifier(random_state=420))
])
evaluation.fit(X_train, y_train)
evaluation.predict(X_test)
evaluation_data = evaluation.evaluate(mutants_and_tests)
evaluation_data

..... finished.
..... finished.
..... finished.
..... finished.
..... finished.
..... finished.


Unnamed: 0,apfd_avg,apfd_max,apfd_min,first_failing_duration_avg,last_failing_duration_avg
NaiveReorderer,0.50683,0.739084,0.283128,3.14765,12.504589
AverageReorderer,0.868186,0.99361,0.485415,0.000755,2.009228
BinaryPredictionReorderer(DecisionTreeClassifier),0.75822,0.99361,0.402023,0.001116,7.703432
BinaryPredictionReorderer(KNeighborsClassifier),0.603726,0.99361,0.28716,0.505221,10.914174
BinaryPredictionReorderer(ExtraTreesClassifier),0.719978,0.99361,0.313099,0.000899,7.755413
BinaryPredictionReorderer(GradientBoostingClassifier),0.765312,0.99361,0.359957,0.016333,7.268652


In [16]:
evaluation_data.to_csv()

',apfd_avg,apfd_max,apfd_min,first_failing_duration_avg,last_failing_duration_avg\nNaiveReorderer,0.5068299136682174,0.739084132055378,0.28312794766468885,3.147650254926374,12.50458931000002\nAverageReorderer,0.8681855454327921,0.9936102236421724,0.4854146409223503,0.0007545048190701392,2.00922835411564\nBinaryPredictionReorderer(DecisionTreeClassifier),0.7582195012474157,0.9936102236421724,0.4020234291799787,0.0011157858756280714,7.703432331546661\nBinaryPredictionReorderer(KNeighborsClassifier),0.6037256391985439,0.9936102236421724,0.2871595922714133,0.5052207500703874,10.914173759183576\nBinaryPredictionReorderer(ExtraTreesClassifier),0.7199784975682442,0.9936102236421724,0.3130990415335463,0.0008987003757107643,7.755413214622005\nBinaryPredictionReorderer(GradientBoostingClassifier),0.7653118279508113,0.9936102236421724,0.3599574014909478,0.016332727862942605,7.268652379128241\n'