# Imports

In [None]:
import git
from git import Repo

from unidiff import PatchSet

import subprocess
import json
import pandas as pd
import numpy as np
import matplotlib
import sys
import matplotlib.pyplot as plt

In [None]:
class ChangedFile:
    def __init__(self, mutant_id = -1, modified_file_path = '', line_number_changed = -1, previous_line = '', current_line = ''):
        self.modified_file_path = modified_file_path
        self.line_number_changed = line_number_changed
        self.previous_line = previous_line
        self.current_line = current_line
        self.mutant_id = mutant_id
    def __str__(self):
        return "Mutant ID:" + mutant_id + "\nModified file: "+ self.modified_file_path + "\n  Line number: " + str(self.line_number_changed) + "\nPrevious Line: " + self.previous_line + " Current line: " + self.current_line

In [None]:
def generate_repo_diff_data(repo: Repo, mutant_id=-1):
    current_diff = repo.index.diff(None)
    diff = repo.git.diff(repo.head, None)
    patchset = PatchSet(diff)
    
    print(patchset[0].target_file)
    modified_file_path = patchset[0].target_file[2:] # Remove b/ from the path
    changed_sourcecode_line = patchset[0][0].source_start
    previous_line = ''
    change_to_line = ''
    for line in patchset[0][0]:
        if line.is_added:
            changed_to_line = str(line)[2:]
        if line.is_removed:
            previous_line = str(line)[2:]
    
    return ChangedFile(mutant_id, modified_file_path, changed_sourcecode_line, previous_line, changed_to_line)

#print(analyze_change_in_working_directory(repo).__dict__)

In [None]:
def generate_test_data(mutant_id=-1):
    with open('repos/flask/report.json') as json_file:
        data = json.load(json_file)["report"]
#         print(json.dumps(data, indent=2))
        test_runs = []
        for test in data["tests"]:
            outcome = True
            if test["outcome"] == "failed":
                outcome = False
            test_runs.append({'test_id': test["run_index"], 'mutant_id': mutant_id, 'outcome': outcome})
        return test_runs
        

In [None]:
!cd repos/flask/ && pytest --json=report.json

In [None]:
mutants = pd.DataFrame(columns=['mutant_id','modified_file_path','line_number_changed','previous_line', 'current_line'])
mutants
test_runs = pd.DataFrame(columns=['test_id', 'mutant_id', 'outcome'])

"failed" == "failed"

In [None]:
def analyze_mutant(*,mutant_id):
    repo = Repo("repos/flask")
    assert not repo.bare
    !echo Switching to Mutant $mutant_id
    !. crm2020_python3.8/bin/activate && cd repos/flask/ && mutmut apply $mutant_id && pytest --json=report.json > /dev/null
    changed_file = generate_repo_diff_data(repo= repo, mutant_id=mutant_id)
    tests = generate_test_data(mutant_id=mutant_id)
    
    # Cleanup logic:
    !cd repos/flask/ && git checkout .
    !rm repos/flask/report.json
    
    return {"changed_file": changed_file, "tests": tests}
    
# analyze_mutant(mutant_id=1)

In [None]:
tests = pd.DataFrame()
mutants = pd.DataFrame()

for mutant_id in range(1, 2091, 2):
    try:
        result = analyze_mutant(mutant_id=mutant_id)
        tests = tests.append(result["tests"], ignore_index=True)
        mutants = mutants.append(result["changed_file"].__dict__, ignore_index=True)
        print("Tests length: " + str(len(tests)))
    except:
        print("Unexpected error:", sys.exc_info()[0])
        # TODO: Here should be a git checkout . 


In [None]:
mutants

In [None]:
tests


In [None]:
mutants_and_tests = mutants.set_index('mutant_id').join(tests.set_index('mutant_id'))
mutants_and_tests.to_pickle('flask_dataset_every_2_mutant.pkl')

In [None]:
mutants_and_tests.groupby(['test_id','outcome']).count()['current_line']


In [None]:
# Covariance Matrix 
mutants_and_tests = pd.read_pickle('flask_dataset_every_2_mutant.pkl')
plt.matshow(mutants_and_tests.pivot(columns='test_id', values='outcome').cov())
plt.show()