In [6]:
import json, shutil, os, sys, shlex, subprocess
from pandas import DataFrame, read_excel
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import fbeta_score
from sklearn.metrics import recall_score
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import itertools
from sklearn.utils import shuffle
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from imblearn.pipeline import make_pipeline as imbalanced_make_pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold, learning_curve
from sklearn import svm
from sklearn.utils import shuffle
import xgboost as xgb

In [None]:
msv_results_path = ""

def mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def get_plausible_results(msv_results_path):
    mkdir(f"{msv_results_path}-parsed")
    for directory in os.listdir(msv_results_path):
        if not directory.endswith("50"):
            continue

        plausibles = []
        with open(f"{msv_results_path}/{directory}/msv-original-sim-data.json") as f:
            history = json.load(f)
            for patch in history.keys():
                if history[patch]["plausible"]:
                    plausibles.append({patch: history[patch]})

        with open(f"{msv_results_path}-parsed/{directory}.json", "w+") as f:
            json.dump(plausibles, f)
    
def copy_patches(msv_results_path_parsed, patch_results_path):
    for project in os.listdir(msv_results_path_parsed):
        project_name = project.split("-")[0]
        candidates = json.load(open(f"{msv_results_path_parsed}/{project}"))

        if project_name not in os.listdir(patch_results_path):
            continue
            
        mkdir(f"plausible_patches/{project_name}")

        for candidate in candidates:
            path = next(iter(candidate.keys()))
            path = path.split("/")[1]

            for patch in os.listdir(f"{patch_results_path}/{project_name}"):
                if patch != path:
                    continue
                shutil.copyfile(f"{patch_results_path}/{project_name}/{patch}", 
                           f"plausible_patches/{project_name}/{patch}")
                
def parse_for_coming():
    for project in os.listdir("plausible_patches/"):
        for idx in os.listdir(f"plausible_patches/{project}"):
            diff_folder = project + "-" + idx[:idx.rfind("_")].replace("_", "-")

            file = os.listdir(f"plausible_patches/{project}/{idx}")[0]
            modif_file = os.listdir(f"plausible_patches/{project}/{idx}")[0].split(".")[0]
            
            mkdir(f"coming_rep/{diff_folder}/{modif_file}")

            shutil.copyfile(f"plausible_patches/{project}/{idx}/{file}", 
                            f"coming_rep/{diff_folder}/{modif_file}/{diff_folder}_{modif_file}_t.java")
            
def get_src_path(project):
    project_name, bug_id = project.split("_")
    bug_id = int(bug_id)
    if project_name == "Math":
        if bug_id < 85:
            return "/src/main/java/"
        return "/src/java/"
    elif project_name == "Time":
        return "/src/main/java/"
    elif project_name == "Lang":
        if bug_id <= 35: 
            return "/src/main/java/"
        return "/src/java/"
    elif project_name == "Chart":
        return "/source/"
    elif project_name == "Closure":
        return "/src/"
    elif project_name == "Mockito":
        return "/src/"
    return None

def fetch_buggy_files(buggy_projects_path):
    for project in os.listdir("coming_rep/"):
        project_name = project.split("-")[0]
        src_path = get_src_path(project_name)
        src_file = os.listdir(f"coming_rep/{project}")[0]
        target_file = os.listdir(f"coming_rep/{project}/{src_file}")[0]

        mid_path = None
        with open(f"coming_rep/{project}/{src_file}/{target_file}") as f:
            for line in f.readlines():
                if line.startswith("package "):
                    mid_path = line[8:].strip()[:-1].replace(".", "/")
                    break

        if mid_path is None:
            print("Fail for", project)
            continue

        buggy_file = f"{buggy_projects_path}/{project_name}/{src_path}/{mid_path}/{src_file}.java"

        shutil.copyfile(buggy_file, f"coming_rep/{project}/{src_file}/{project}_{src_file}_s.java")
        
def parse_msv(msv_results_path, patch_results_path, buggy_projects_path):
    get_plausible_results(msv_results_path)
    msv_results_path_parsed = msv_results_path + "-parsed"
    copy_patches(msv_results_path_parsed, patch_results_path)
    parse_for_coming()
    fetch_buggy_files(buggy_projects_path)
    
args = sys.argv
msv_results_logs = args[1]
generated_patches_path = args[2]
path_to_buggy_projects = args[3]
parse_msv(msv_results_logs, generated_patches_path, path_to_buggy_projects)

In [7]:
## Run Coming tool
def run_coming(coming_path, pairs_path):
    command = f"java -classpath {coming_path}/target/coming-0-SNAPSHOT-jar-with-dependencies.jar fr.inria.coming.main.ComingMain -input files -mode features -location {pairs_path} -output ./out"
    process = subprocess.Popen(shlex.split(command), stderr=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, stderr = process.communicate()
    
    if process.returncode != 0:
        print(stderr)
        raise Exception("Something wrong with Coming")

In [8]:
def run_ods():
    training_list= "./train.csv"
    testing_list= "./test.csv"

    training = pd.read_csv(training_list, encoding='latin1',index_col=False)
    testing = pd.read_csv(testing_list, encoding='latin1',index_col=False)

    X_train = training.iloc[:,2:]
    Y_train = training.iloc[:,1]
    X_test = testing.iloc[:,1:]
    id_test = testing.iloc[:,0]

    X_train, Y_train = shuffle(X_train, Y_train, random_state=0)
    X_train, Y_train = X_train.values, Y_train.values
    model = xgb.XGBClassifier(random_state=42, max_depth=6, gamma=0.5)
    eval_set=[(X_train,Y_train)]
    model.fit(X_train,Y_train, early_stopping_rounds=30, eval_metric="mae", eval_set=eval_set)
    Y_pred = model.predict_proba(X_test)[:, 1]
    result={'patch':id_test,'prediction_label':Y_pred}
    resultDF = pd.DataFrame(result)
    resultDF.to_csv('./prediction.csv')