In [412]:
import git
from git import Repo

from unidiff import PatchSet

import subprocess
import json
import pandas as pd
import numpy as np
import matplotlib
import sys
import matplotlib.pyplot as plt
import sklearn

from mutester.reordering_evaluation import ReorderingEvaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import ExtraTreesClassifier

In [413]:
# Covariance Matrix 
mutants_and_tests = pd.read_pickle('flask_full_with_edit_distances_and_context.pkl')

# Sparsify data for faster analysis: 
# mutants_and_tests = mutants_and_tests.loc[mutants_and_tests['test_id'] < 100].loc[mutants_and_tests['mutant_id'] < 1000]

mutants_and_tests.reset_index()
mutants_and_tests["outcome"] = mutants_and_tests["outcome"].astype('bool')
mutants_and_tests["outcome"]
display(mutants_and_tests)

Unnamed: 0,mutant_id,contains_branch_mutant,contains_equality_comparison_mutant,contains_loop_mutant,contains_math_operands_mutant,current_line,line_number_changed,modified_file_path,modified_method,previous_line,...,setup_duration,call_outcome,call_duration,teardown_outcome,teardown_duration,contains_branch_execution,contains_loop_execution,contains_math_operands_execution,contains_equality_comparison_execution,edit_distance_method_name_test_name
0,1977.0,1.0,0.0,0.0,1.0,"key = ""XX mXX""",175.0,src/flask/json/tag.py,TagMarkup,,...,0.031885,True,0.000806,True,0.000296,False,False,True,False,9
1,1977.0,1.0,0.0,0.0,1.0,"key = ""XX mXX""",175.0,src/flask/json/tag.py,TagMarkup,,...,0.001572,True,0.000364,True,0.000240,False,True,False,False,9
2,1977.0,1.0,0.0,0.0,1.0,"key = ""XX mXX""",175.0,src/flask/json/tag.py,TagMarkup,,...,0.000355,True,0.000237,True,0.000219,False,True,False,False,9
3,1977.0,1.0,0.0,0.0,1.0,"key = ""XX mXX""",175.0,src/flask/json/tag.py,TagMarkup,,...,0.001527,True,0.000934,True,0.000243,False,False,False,True,9
4,1977.0,1.0,0.0,0.0,1.0,"key = ""XX mXX""",175.0,src/flask/json/tag.py,TagMarkup,,...,0.001606,True,0.000288,True,0.000238,False,False,False,True,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
958036,2092.0,1.0,1.0,0.0,1.0,"mimetype=current_app.config[""XXJSONIFY_...",336.0,src/flask/json/__init__.py,jsonify,,...,0.001344,True,0.001591,True,0.000247,False,False,False,False,7
958037,2092.0,1.0,1.0,0.0,1.0,"mimetype=current_app.config[""XXJSONIFY_...",336.0,src/flask/json/__init__.py,jsonify,,...,0.001295,True,0.002982,True,0.000226,False,False,False,False,7
958038,2092.0,1.0,1.0,0.0,1.0,"mimetype=current_app.config[""XXJSONIFY_...",336.0,src/flask/json/__init__.py,jsonify,,...,0.001311,True,0.001594,True,0.000247,False,False,False,False,7
958039,2092.0,1.0,1.0,0.0,1.0,"mimetype=current_app.config[""XXJSONIFY_...",336.0,src/flask/json/__init__.py,jsonify,,...,0.001978,True,0.001632,True,0.000247,False,False,False,False,7


# Data Preparation

In [453]:
feature_columns = ["contains_branch_mutant", "contains_loop_mutant", "contains_math_operands_mutant", "contains_equality_comparison_mutant",
    "contains_branch_execution", "contains_loop_execution", "contains_math_operands_execution", "contains_equality_comparison_execution","edit_distance_method_name_test_name", "current_line", "line_number_changed", "modified_file_path", "previous_line", "test_id", "name", "filepath", "modified_method", "mutant_id"]#
# feature_columns = ["current_line", "line_number_changed", "modified_file_path", "previous_line", "test_id", "name", "filepath", "mutant_id"]

# Throw out test_id 82, since it makes life really hard
mutants_and_tests = mutants_and_tests.loc[mutants_and_tests['test_id'] != 82.0].loc[mutants_and_tests['test_id'] != 83.0].copy()


X = mutants_and_tests[feature_columns]
y = mutants_and_tests["outcome"]



# Encode None as false in the context stuff
for column in ["contains_branch_mutant", "contains_loop_mutant", "contains_math_operands_mutant", "contains_equality_comparison_mutant",
    "contains_branch_execution", "contains_loop_execution", "contains_math_operands_execution", "contains_equality_comparison_execution"]:
    mutants_and_tests[column].replace('None', False, inplace=True)



# Throw out nan values:
X = X.loc[mutants_and_tests[feature_columns].isnull().any(axis=1) == False]
y = mutants_and_tests["outcome"].loc[mutants_and_tests[feature_columns].isnull().any(axis=1) == False]
y = y.astype('bool')
len(X)




# Encode stuff we want to encode:
encoded_columns = ["modified_method", "modified_file_path", "name", "filepath", "current_line", "previous_line"]
# encoded_columns = ["name", "filepath", "modified_file_path", "current_line", "previous_line"]
# Encoding
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
X_enc = enc.fit_transform(X[encoded_columns])
i = 0
for column_name in encoded_columns:
    X[column_name] = X_enc[:,i]
    i += 1


split_mutant = X['mutant_id'].max() * 0.7
print("split_mutant is " + str(split_mutant))
# TODO: this has to split of whole
train = X.loc[X['mutant_id'] < split_mutant]
test = X.loc[X['mutant_id'] >= split_mutant]

X_train = X.loc[X['mutant_id'] < split_mutant]
y_train = y.loc[X['mutant_id'] < split_mutant]

X_test = X.loc[X['mutant_id'] >= split_mutant]
y_test = y.loc[X['mutant_id'] >= split_mutant]

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=420, shuffle=False)
mutants_and_tests.loc[mutants_and_tests['test_id'] == 82.0]

split_mutant is 1464.3999999999999


Unnamed: 0,mutant_id,contains_branch_mutant,contains_equality_comparison_mutant,contains_loop_mutant,contains_math_operands_mutant,current_line,line_number_changed,modified_file_path,modified_method,previous_line,...,setup_duration,call_outcome,call_duration,teardown_outcome,teardown_duration,contains_branch_execution,contains_loop_execution,contains_math_operands_execution,contains_equality_comparison_execution,edit_distance_method_name_test_name


In [399]:
class Reorderer:
    
    
    def fit(self, X_train, y_train):
        raise NotImplementedError
    
    # Task: make an ordering for every mutant in X_test
    def predict(self, X_test):
        raise NotImplementedError

In [400]:
# Just uses a linear ordering
class NaiveReorderer:
    def fit(self, X_train, y_train):
        pass
    
    def predict(self, X_test):
        test_ids = list(X_test.groupby('test_id').count().index)
        orderings = X_test.groupby('mutant_id').count()
        orderings['order'] = None
        orderings['order'] = orderings['order'].astype('object')
        for row in orderings.itertuples():
            orderings.at[row.Index, 'order'] = test_ids
        return orderings

In [401]:
# Executes the most flaky tests first
class AverageReorderer:
    def fit(self, X_train, y_train):
        # Count how often the test failed in the 'mutant_id' column
        X_train = X_train.copy()
        X_train['outcome'] = y_train
        sorted_test_ids = X_train.groupby(['test_id', 'outcome']).count().loc[(slice(None), False), :].sort_values('mutant_id', ascending=False).reset_index()
        # This is directly how we want our permutation to be:
        self.ordering = list(sorted_test_ids['test_id'])
        
    def predict(self, X_test):
        orderings = X_test.groupby('mutant_id').count()
        orderings['order'] = None
        orderings['order'] = orderings['order'].astype('object')
        for row in orderings.itertuples():
            orderings.at[row.Index, 'order'] = self.ordering
        return orderings
        

In [434]:
X_train

Unnamed: 0,contains_branch_mutant,contains_loop_mutant,contains_math_operands_mutant,contains_equality_comparison_mutant,contains_branch_execution,contains_loop_execution,contains_math_operands_execution,contains_equality_comparison_execution,edit_distance_method_name_test_name,current_line,line_number_changed,modified_file_path,previous_line,test_id,name,filepath,modified_method,mutant_id
132940,1.0,1.0,1.0,1.0,False,False,True,False,33,1445.0,118.0,5.0,0.0,0.0,25.0,0.0,98.0,209.0
132941,1.0,1.0,1.0,1.0,False,True,False,False,33,1445.0,118.0,5.0,0.0,1.0,250.0,0.0,98.0,209.0
132942,1.0,1.0,1.0,1.0,False,True,False,False,33,1445.0,118.0,5.0,0.0,2.0,251.0,0.0,98.0,209.0
132943,1.0,1.0,1.0,1.0,False,False,False,True,33,1445.0,118.0,5.0,0.0,3.0,159.0,0.0,98.0,209.0
132944,1.0,1.0,1.0,1.0,False,False,False,True,33,1445.0,118.0,5.0,0.0,4.0,11.0,0.0,98.0,209.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
929952,1.0,1.0,1.0,1.0,False,False,False,False,8,1167.0,289.0,4.0,0.0,463.0,101.0,16.0,29.0,312.0
929953,1.0,1.0,1.0,1.0,False,False,False,False,8,1167.0,289.0,4.0,0.0,464.0,81.0,16.0,29.0,312.0
929954,1.0,1.0,1.0,1.0,False,False,False,False,8,1167.0,289.0,4.0,0.0,465.0,138.0,16.0,29.0,312.0
929955,1.0,1.0,1.0,1.0,False,False,False,False,8,1167.0,289.0,4.0,0.0,466.0,142.0,16.0,29.0,312.0


In [418]:
# This is magic: It will first predict using a binary predictor, and then put the tests in front that you said are gonna fail.
# TODO: It will even help you more: it will sort your predicted failures after duration, so that you get short durations for the first test failing
# TODO: It should probably also weight the predicted not failing tests after the average
class BinaryPredictionReorderer:
    def __init__(self, predictor):
        # This predictor should be in the 
        self.predictor = predictor
    
    
    def fit(self, X_train, y_train):
        self.predictor.fit(X_train, y_train)
        
    def predict(self, X_test):
        X_test = X_test.copy()
        orderings = X_test.groupby('mutant_id').count()
        orderings['order'] = None
        orderings['order'] = orderings['order'].astype('object')
        self.prediction = self.predictor.predict(X_test)
        X_test['outcome_prediction'] = self.prediction
        for row in orderings.itertuples():
            predictions_for_mutant = X_test.loc[X_test['mutant_id'] == row.Index]
            # print(predictions_for_mutant.loc[predictions_for_mutant['outcome_prediction'] == False]['test_id'])
            fail_predictions = predictions_for_mutant.loc[predictions_for_mutant['outcome_prediction'] == False]['test_id']
            success_predictions = predictions_for_mutant.loc[predictions_for_mutant['outcome_prediction'] == True]['test_id']
            orderring = list(fail_predictions.append(success_predictions))
            #print(len(orderring))
            orderings.at[row.Index, 'order'] = orderring
            #print(list(fail_predictions.append(success_predictions)))
        return orderings
        

In [391]:
nr = NaiveReorderer()
naive = nr.predict(X_test)
ar = AverageReorderer()
ar.fit(X_train, y_train)
aver = ar.predict(X_test)

In [387]:
# Decision Tree
from sklearn import tree
clf = tree.DecisionTreeClassifier(ccp_alpha=0.001)

bpr_tree = BinaryPredictionReorderer(tree.DecisionTreeClassifier(ccp_alpha=0.001))
bpr_tree.fit(X_train, y_train)
bpr_tree_order = bpr_tree.predict(X_test)

In [455]:
# Forest

bpr_forest = BinaryPredictionReorderer(ExtraTreesClassifier(n_estimators=250,
                              random_state=420))
bpr_forest.fit(X_train, y_train)
bpr_forest_order = bpr_forest.predict(X_test)

In [None]:

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, y_train)
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma='auto'))])

In [None]:
from sklearn.neighbors import KNeighborsClassifier


bpr_kn = BinaryPredictionReorderer(KNeighborsClassifier())
bpr_kn.fit(X_train, y_train)
bpr_kn_order = bpr_tree.predict(X_test)


In [454]:
class BaselineBinaryPredictor:
    def fit(self, X_train, y_train):
        self.X_train = X_train
    
    
    def predict(self, X_test):
        predictions = []
        for index, row in X_test.iterrows():
            # Select only rows from X_train with the same test_id
            correct_tests = self.X_train.loc[self.X_train['test_id'] == row['test_id']]
            #print("Test_id: " + str(row['test_id']))
            #print("Mutant_id: " + str(row['mutant_id']))
            #print(len(correct_tests))
            mutant_id = row['mutant_id']
            #print(correct_tests)
            nearest_mutant_id_index = abs(correct_tests['mutant_id'] - mutant_id).idxmin()
            #print(nearest_mutant_id_index)
            #print(y_train)
            predictions.append(y_train[nearest_mutant_id_index])
        return predictions
        
bpr_baseline = BinaryPredictionReorderer(BaselineBinaryPredictor())
bpr_baseline.fit(X_train, y_train)
bpr_baseline_order = bpr_baseline.predict(X_test)



## Evaluation

In [461]:
m = X_test.copy()
m['outcome'] = y_test
m['duration'] = mutants_and_tests['duration']

def series_analysis(name, series):
    print(name + ': ')
    print('Avg: {:.8f} Min: {:01.8f}, Max: {:01.8f}'.format(np.mean(series), np.min(series), np.max(series)))

def ordering_evaluation(ordering, mutants_and_tests, name=''):
    # TODO: Make this nicely from the class (dataclass?)
    test_count = mutants_and_tests.groupby('test_id').count().shape[0]
    print(test_count)
    apfd = []
    first_failing_duration = []
    last_test_failing_duration = []
    for row in ordering.itertuples():
        if row.Index % 50 == 0:
            print('.', end='')
        mutant_executions = m.loc[m['mutant_id'] == row.Index]
        # Only execute the metrics if we have at least one failure
        if mutant_executions['outcome'].values.all() == False:
            order = ordering.loc[row.Index].order
            #print(order)
            #if len(order) != test_count:
            #    print('Not a full ordering was specified, skipping...')
            #    continue
            r = ReorderingEvaluation(order, mutant_executions)
            apfd.append(r.APFD())
            first_failing_duration.append(r.first_failing_duration())
            last_test_failing_duration.append(r.last_test_failing_duration())
            #print(r.first_failing_duration())
    print('--------' + name + '---------')
    print('----- Analysis -------')
    # print(apfd)
    series_analysis('apfd', apfd)
    series_analysis('first_failing_duration', first_failing_duration)
    series_analysis('last_test_failing_duration', last_test_failing_duration)
    

#ordering_evaluation(aver, mutants_and_tests, 'aver')
#ordering_evaluation(naive, mutants_and_tests, 'naive')
#ordering_evaluation(bpr_tree_order, mutants_and_tests, 'Decision Tree')
ordering_evaluation(bpr_forest_order, mutants_and_tests, 'Forest')
#ordering_evaluation(bpr_baseline_order, mutants_and_tests, 'Baseline Binary')
# TODO: Check if all have same length

466
..........--------Forest---------
----- Analysis -------
apfd: 
Avg: 0.91683044 Min: 0.38209220, Max: 0.98049645
first_failing_duration: 
Avg: 0.00445580 Min: 0.00037622, Max: 0.92044806
last_test_failing_duration: 
Avg: 0.52945881 Min: 0.00417948, Max: 2.06705403


In [360]:
mutant_executions = mutants_and_tests.loc[mutants_and_tests['mutant_id'] == 2]
mutant_executions
r = ReorderingEvaluation(o.loc[2].order, mutant_executions)

In [361]:
print(r.APFD())
print(r.first_failing_duration())
print(r.last_test_failing_duration())

0.9027777777777778
4.251628637313843


AssertionError: The provided dataframe contains more than one entry for the given test_id

In [138]:
ordering

0      383.0
1      225.0
2      388.0
3      387.0
4      386.0
       ...  
463    273.0
464    358.0
465    357.0
466    274.0
467    275.0
Name: test_id, Length: 468, dtype: float64