In [80]:
import git
from git import Repo

from unidiff import PatchSet

import subprocess
import json
import pandas as pd
import numpy as np
import matplotlib
import sys
import matplotlib.pyplot as plt
import sklearn

from mutester.reordering_evaluation import ReorderingEvaluation

In [111]:
# Covariance Matrix 
mutants_and_tests = pd.read_pickle('20200621-235502_flask_full_threaded_with_method_names.pkl')
mutants_and_tests.reset_index()
mutants_and_tests["outcome"] = mutants_and_tests["outcome"].astype('bool')
mutants_and_tests["outcome"]
display(mutants_and_tests)

Unnamed: 0,mutant_id,current_line,line_number_changed,modified_file_path,modified_method,previous_line,repo_path,outcome,test_id,full_name,name,filepath,duration,setup_outcome,setup_duration,call_outcome,call_duration,teardown_outcome,teardown_duration
0,1873.0,debug = None,841.0,src/flask/cli.py,run_command,,/tmp/tmpszy15usp,True,0.0,tests/test_appctx.py::test_basic_url_generation,test_basic_url_generation,tests/test_appctx.py,0.059266,True,0.029150,True,0.000706,True,0.000260
1,1873.0,debug = None,841.0,src/flask/cli.py,run_command,,/tmp/tmpszy15usp,True,1.0,tests/test_appctx.py::test_url_generation_requ...,test_url_generation_requires_server_name,tests/test_appctx.py,0.003147,True,0.001320,True,0.000298,True,0.000208
2,1873.0,debug = None,841.0,src/flask/cli.py,run_command,,/tmp/tmpszy15usp,True,2.0,tests/test_appctx.py::test_url_generation_with...,test_url_generation_without_context_fails,tests/test_appctx.py,0.000988,True,0.000293,True,0.000176,True,0.000225
3,1873.0,debug = None,841.0,src/flask/cli.py,run_command,,/tmp/tmpszy15usp,True,3.0,tests/test_appctx.py::test_request_context_mea...,test_request_context_means_app_context,tests/test_appctx.py,0.003678,True,0.001334,True,0.000801,True,0.000208
4,1873.0,debug = None,841.0,src/flask/cli.py,run_command,,/tmp/tmpszy15usp,True,4.0,tests/test_appctx.py::test_app_context_provide...,test_app_context_provides_current_app,tests/test_appctx.py,0.003005,True,0.001300,True,0.000202,True,0.000202
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
958036,1040.0,if self.session_interface.is_null_sess...,2174.0,src/flask/app.py,process_response,,/tmp/tmpsuxyyqwi,True,463.0,tests/test_views.py::test_explicit_head,test_explicit_head,tests/test_views.py,0.005060,True,0.001630,True,0.001569,True,0.000230
958037,1040.0,if self.session_interface.is_null_sess...,2174.0,src/flask/app.py,process_response,,/tmp/tmpsuxyyqwi,True,464.0,tests/test_views.py::test_endpoint_override,test_endpoint_override,tests/test_views.py,0.005697,True,0.001254,True,0.002970,True,0.000220
958038,1040.0,if self.session_interface.is_null_sess...,2174.0,src/flask/app.py,process_response,,/tmp/tmpsuxyyqwi,True,465.0,tests/test_views.py::test_methods_var_inheritance,test_methods_var_inheritance,tests/test_views.py,0.004606,True,0.001403,True,0.001572,True,0.000228
958039,1040.0,if self.session_interface.is_null_sess...,2174.0,src/flask/app.py,process_response,,/tmp/tmpsuxyyqwi,True,466.0,tests/test_views.py::test_multiple_inheritance,test_multiple_inheritance,tests/test_views.py,0.004516,True,0.001343,True,0.001601,True,0.000228


In [146]:
class Reorderer:
    
    
    def fit(self, X_train, y_train):
        raise NotImplementedError
    
    # Task: make an ordering for every mutant in X_test
    def predict(self, X_test):
        raise NotImplementedError

In [147]:
# Just uses a linear ordering
class NaiveReorderer:
    def fit(self, X_train, y_train):
        pass
    
    def predict(self, X_test):
        test_ids = list(X_test.groupby('test_id').count().index)
        orderings = X_test.groupby('mutant_id').count()
        orderings['order'] = None
        orderings['order'] = orderings['order'].astype('object')
        for row in orderings.itertuples():
            orderings.at[row.Index, 'order'] = test_ids
        return orderings

In [148]:
# Executes the most flaky tests first
class AverageReorderer:
    def fit(self, X_train, y_train):
        # Count how often the test failed in the 'mutant_id' column
        sorted_test_ids = mutants_and_tests.groupby(['test_id', 'outcome']).count().loc[(slice(None), False), :].sort_values('mutant_id', ascending=False).reset_index()
        # This is directly how we want our permutation to be:
        self.ordering = list(sorted_test_ids['test_id'])
        
    def predict(self, X_test):
        orderings = X_test.groupby('mutant_id').count()
        orderings['order'] = None
        orderings['order'] = orderings['order'].astype('object')
        for row in orderings.itertuples():
            orderings.at[row.Index, 'order'] = self.ordering
        return orderings
        

In [149]:
nr = NaiveReorderer()
naive = nr.predict(mutants_and_tests)
ar = AverageReorderer()
ar.fit(mutants_and_tests, mutants_and_tests['outcome'])
aver = ar.predict(mutants_and_tests)

In [151]:
print(aver)

           current_line  line_number_changed  modified_file_path  \
mutant_id                                                          
1.0                 468                  468                 468   
2.0                 468                  468                 468   
3.0                 468                  468                 468   
4.0                 468                  468                 468   
5.0                 468                  468                 468   
...                 ...                  ...                 ...   
2088.0              468                  468                 468   
2089.0              468                  468                 468   
2090.0              468                  468                 468   
2091.0              468                  468                 468   
2092.0              468                  468                 468   

           modified_method  previous_line  repo_path  outcome  test_id  \
mutant_id                                

## Evaluation

In [162]:
for row in aver.itertuples():
    mutant_executions = mutants_and_tests.loc[mutants_and_tests['mutant_id'] == row.Index]
    
    # Only execute the metrics if we have at least one failure
    if mutant_executions['outcome'].values.all() == False:
        r = ReorderingEvaluation(aver.loc[row.Index].order, mutant_executions)
        print(r.APFD())

True
0.8853121516164995
True
0.9671855921855922
True
0.9861111111111112
True
0.9861111111111112
True
0.7766462464738326
True
0.9351851851851851
True
0.9351851851851851
True
0.9554093567251462
True
0.9554093567251462
True
0.9560185185185185
True
0.9560185185185185
True
0.9560185185185185
True
0.7103468113083498
True
0.9560185185185185
True
0.9807692307692307
True
0.9560185185185185
True
0.9732905982905983
True
0.9861111111111112
True
0.9732905982905983
True
0.9861111111111112
True
0.9311490978157645
True
0.8825414781297134
True
0.8825414781297134
True
0.9083710407239819
True
0.9083710407239819
True
0.9465811965811965
True
0.8071979844367905
True
0.7941440604484082
True
0.7941440604484082
True
0.803281832127986
True
0.9861111111111112
True
0.9861111111111112
True
0.972985347985348
True
0.972985347985348
True
0.972985347985348
True
0.962369420702754
True
0.962369420702754
True
0.9131562881562881
True
0.9158769973987365
True
0.9131562881562881
True
0.7941440604484082
True
0.973748473748473

In [103]:
mutant_executions = mutants_and_tests.loc[mutants_and_tests['mutant_id'] == 2]
mutant_executions
r = ReorderingEvaluation(o.loc[2].order, mutant_executions)

In [104]:
print(r.APFD())
print(r.first_failing_duration())
print(r.last_test_failing_duration())

0.2634310134310134
1.503943920135498
2.801299810409546


In [138]:
ordering

0      383.0
1      225.0
2      388.0
3      387.0
4      386.0
       ...  
463    273.0
464    358.0
465    357.0
466    274.0
467    275.0
Name: test_id, Length: 468, dtype: float64