# Read Data

In [None]:
from scipy.optimize import minimize
import numpy as np
import pandas as pd
import json
from collections import OrderedDict, defaultdict
from pathlib import Path
import os
ROOT = Path('devign')
ROOT.mkdir(exist_ok=True)
data = json.load(open(ROOT/'Devign.json'), object_pairs_hook=OrderedDict)

In [None]:
testIDs = list(map(int,open(os.path.join(ROOT,"test.txt")).readlines()))
trainIDs = list(map(int,open(os.path.join(ROOT,"train.txt")).readlines()))
validIDs = list(map(int,open(os.path.join(ROOT,"valid.txt")).readlines()))
len(testIDs),min(testIDs), min(trainIDs), min(validIDs)

In [None]:
details = []
with open('TransformationDetails-TEST.txt', 'r') as f:
    content = f.readlines()
    for row in content:
        details.append(list(row.split()))
len(details)

# Read predictions

In [None]:
predictionVector = defaultdict(lambda: [0 for _ in range(18)])

In [None]:
transforms= ['LimitsOfML4Vuln-transformationstf_9',
 'LimitsOfML4Vuln-transformationstf_7',
 'Code-Imitator-transformationsCompound_statement_transformations_1',
 'LimitsOfML4Vuln-transformationstf_12',
 'RoPGen-transformationsDefinition_and_initialization_of_multiple_variables_with_same_types',
 'LimitsOfML4Vuln-transformationstf_5',
 'RoPGen-transformationsCompound_if_statements',
 'RoPGen-transformationsLocation_and_initializing_local_variables',
 'RoPGen-transformationsLoop_structures',
 'Code-Imitator-transformationsInclude-typedef_transformations',
 'LimitsOfML4Vuln-transformationstf_4',
 'Code-Imitator-transformationsOutput_interface_transformations_3',
 'RoPGen-transformationsAccess_of_array_pointer_elements',
 'RoPGen-transformationsVariable_assignment',
 'Code-Imitator-transformationsFloating-point_type_transformations',
 'Code-Imitator-transformationsInput_interface_transformations_2']

In [None]:
import torch
def getPercentages(l):
    return torch.nn.functional.softmax(torch.tensor(l), dim=0)

In [None]:
f = open("Results/vulberta-TEST.txt").readlines() #predictionsVulberta
origLabel_VULBERTA = dict()
origPrediction_VULBERTA = dict()
origProbability_VULBERTA = dict()
for i,l in enumerate(f):
    content = l.split()
    origLabel_VULBERTA[testIDs[i]] = content[1]
    origPrediction_VULBERTA[testIDs[i]] = content[0]
            
    probabilities = eval(content[2]+content[3])
    predictionProbability = getPercentages(probabilities)[1]
    origProbability_VULBERTA[testIDs[i]] = predictionProbability
    
    predictionVector[testIDs[i]][-1] = predictionProbability-0.5


In [None]:
f = open("Results/plbart-TEST.txt").readlines() #result-ORIGINAL
origLabel_PLBART = dict()
origPrediction_PLBART = dict()
origProbability_PLBART = dict()
for i,l in enumerate(f):
    content = l.split()
    origLabel_PLBART[testIDs[i]] = content[1]
    origPrediction_PLBART[testIDs[i]] = content[0]
        
    probabilities = eval(content[2][7:]+content[3][:-1])
    predictionProbability = getPercentages(probabilities)[1]
    origProbability_PLBART[testIDs[i]] = predictionProbability
    
    predictionVector[testIDs[i]][-2] = predictionProbability-0.5

# Read predictions on transformations

In [None]:
f = open("Results/vulberta-transformed-TEST.txt").readlines()
ensemblePredictions_VULBERTA = defaultdict(list)
ensembleProbability_VULBERTA = defaultdict(list)
for i,l in enumerate(f):
    content = l.split()
    getID = int(details[i][2])
    ensemblePredictions_VULBERTA[getID].append(int(content[0]))
    transform = details[i][0] + details[i][1]
    # 1 transformation is in the validation set but not in the test set
    if transform not in transforms:
        continue
    
    probabilities = eval(content[2]+content[3])
    predictionProbability = getPercentages(probabilities)[1]
    ensembleProbability_VULBERTA[getID].append(predictionProbability)
    
    predictionVector[getID][transforms.index(transform)]+=predictionProbability-0.5

In [None]:
f_PLBART = open("Results/plbart-transformed-TEST.txt").readlines()
ensemblePredictions_PLBART = defaultdict(list)
ensembleProbability_PLBART = defaultdict(list)
for i,l in enumerate(f_PLBART):
    content = l.split()
    getID = int(details[i][2])
    ensemblePredictions_PLBART[getID].append(int(content[0]))
    transform = details[i][0] + details[i][1]
    # 1 transformation is in the validation set but not in the test set
    if transform not in transforms:
        continue
    
    probabilities = eval(content[2][7:]+content[3][:-1])
    predictionProbability = getPercentages(probabilities)[1]
    ensembleProbability_PLBART[getID].append(predictionProbability)
    
    predictionVector[getID][transforms.index(transform)]+=predictionProbability-0.5

In [None]:
def fitness(x,avail=predictionVector):
    total = 0
    for k,v in avail.items():
        pred = sum(v*x)
        if pred > 0:
            pred = 1
        else:
            pred = 0
        total += int(int(origLabel_PLBART[k]) == pred)
    return - (total /2732)

In [None]:
x0 = np.array([1 for _ in range(18)])
for m in ["Nelder-Mead","Powell","CG","BFGS","L-BFGS-B","TNC","COBYLA","SLSQP"]:
    res = minimize(fitness, x0, method=m)
    print (m, res.fun,res.x)