# Get the single task results

In [1]:
import json

def extract_json_structure(path):
    with open(path, "r") as file:
        content = file.read()

    lan = path.split("/")[-1].split("_")[2]
    start_index = content.find(f"{{'{lan}_accuracy':")
    # Find the start of the JSON-like structure
    # start_index = content.find(start_token)
    if start_index == -1:
        return None

    # Initialize counters for open and close curly braces
    open_braces = 0
    close_braces = 0

    # Iterate over the content to find the end of the JSON-like structure
    for i in range(start_index, len(content)):
        if content[i] == '{':
            open_braces += 1
        elif content[i] == '}':
            close_braces += 1
        if open_braces == close_braces and open_braces > 0:
            end_index = i
            json_str = content[start_index:end_index+1]
            # print(json_str)
            # Convert 'array(...)' to Python list format
            json_str = json_str.replace("array(", "")
            json_str = json_str.replace(")", "")
            parsed_data = eval(json_str)
            parsed_data['lan'] = lan
            parsed_data[f'{lan}_recall'] = parsed_data[f'{lan}_recall'][1]
            parsed_data[f'{lan}_precision'] = parsed_data[f'{lan}_precision'][1]
            parsed_data[f'{lan}_f1'] = parsed_data[f'{lan}_f1'][1]
            parsed_data.pop(f"{lan}_cm", None)
            return parsed_data

    print(f"Error: JSON-like structure not found in file {path}")
    return None

In [2]:
extract_json_structure("_logs_linevul/bigvul_results_Python_seed8432.out")

{'Python_accuracy': 0.9843962008141113,
 'Python_ap': 0.9024974554362696,
 'Python_recall': 0.81730769,
 'Python_precision': 0.95505618,
 'Python_f1': 0.88082902,
 'Python_threshold': 0.9976,
 'Python_mcc': 0.8755650561713234,
 'lan': 'Python'}

In [2]:

# list all files end with .out
import os

# List all files in the provided directory that end with .out
def list_files_with_numbers(directory, numbers, strings):
    # Filter files whose names contain one of the specified numbers
    # List to store the matching files
    matching_files = []

    # Traverse the directory
    for dirpath, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            # Check if the file ends with .out
            if filename.endswith('.out'):
                # Check if any of the numbers is in the filename
                if any(str(num) in filename for num in numbers):
                    # Check if any of the strings is in the filename
                    if any(s in filename for s in strings):
                        matching_files.append(os.path.join(dirpath, filename))

    return matching_files

# Directory path (replace with your target directory)
directory = '_logs_linevul//'

# List of numbers
numbers = [43, 6916, 25569, 8408, 72844, 8432, 9406, 6088, 25888, 76954]

# Strings to be present in the filename
languages = ["Python", "TypeScript", "Java", "CS", "CCPP", "JavaScript"]

# Get the list of matching files
matching_files = list_files_with_numbers(directory, numbers, languages)


In [3]:
json_list = [extract_json_structure(path) for path in matching_files]

# Remove None values
filtered_lst = [x for x in json_list if x is not None]

In [4]:
grouped_data = {}
averaged_data = {}
for item in filtered_lst:
    lan = item['lan']
    if lan not in grouped_data:
        grouped_data[lan] = []
    grouped_data[lan].append(item)

for lan, items in grouped_data.items():
    avg_item = {'lan': lan}
    metric_count = len(items)
    for item in items:
        for key, value in item.items():
            if key != 'lan' and isinstance(value, (int, float)):
                prefix = f"{lan}_"
                if key.startswith(prefix):
                    metric_key = key[len(prefix):]
                    avg_item[metric_key] = avg_item.get(metric_key, 0) + value
    for key, value in avg_item.items():
        if key != 'lan':
            avg_item[key] = value / metric_count
            avg_item[key] = round(avg_item[key], 4)
    averaged_data[lan] = avg_item

In [5]:
averaged_data

{'CS': {'lan': 'CS',
  'accuracy': 0.8792,
  'ap': 0.7627,
  'recall': 0.5845,
  'precision': 0.7685,
  'f1': 0.6582,
  'threshold': 0.2969,
  'mcc': 0.599},
 'TypeScript': {'lan': 'TypeScript',
  'accuracy': 0.8963,
  'ap': 0.3226,
  'recall': 0.2141,
  'precision': 0.3477,
  'f1': 0.238,
  'threshold': 0.093,
  'mcc': 0.2071},
 'Python': {'lan': 'Python',
  'accuracy': 0.9858,
  'ap': 0.8824,
  'recall': 0.8101,
  'precision': 0.9669,
  'f1': 0.881,
  'threshold': 0.8964,
  'mcc': 0.8777},
 'Java': {'lan': 'Java',
  'accuracy': 0.908,
  'ap': 0.3624,
  'recall': 0.2688,
  'precision': 0.3112,
  'f1': 0.2869,
  'threshold': 0.0566,
  'mcc': 0.2398},
 'CCPP': {'lan': 'CCPP',
  'accuracy': 0.9519,
  'ap': 0.2515,
  'recall': 0.0773,
  'precision': 0.6988,
  'f1': 0.1366,
  'threshold': 0.1258,
  'mcc': 0.2165},
 'JavaScript': {'lan': 'JavaScript',
  'accuracy': 0.9139,
  'ap': 0.5944,
  'recall': 0.4752,
  'precision': 0.5676,
  'f1': 0.5152,
  'threshold': 0.2167,
  'mcc': 0.4719}}

## Read the MC results

In [7]:
def extract_mc_json_structure(path, languages):
    with open(path, "r") as file:
        content = file.read()
        content = content.strip()
    
    if "11" not in content[:8]:
        print("Obsolete file", path)
        return None
    
    phase = "_".join(path.split("/")[-1].split("_")[2:-1])

    parsed_list = []

    for lan in languages:
        start_index = content.find(f"{{'{lan}_accuracy':")
        # Find the start of the JSON-like structure
        # start_index = content.find(start_token)
        if start_index == -1:
            continue

        # Initialize counters for open and close curly braces
        open_braces = 0
        close_braces = 0

        # Iterate over the content to find the end of the JSON-like structure
        for i in range(start_index, len(content)):
            if content[i] == '{':
                open_braces += 1
            elif content[i] == '}':
                close_braces += 1
            if open_braces == close_braces and open_braces > 0:
                end_index = i
                json_str = content[start_index:end_index+1]
                # print(json_str)
                # Convert 'array(...)' to Python list format
                json_str = json_str.replace("array(", "")
                json_str = json_str.replace(")", "")
                try:
                    parsed_data = eval(json_str)
                    parsed_data['lan'] = lan
                    parsed_data['phase'] = phase
                    parsed_data[f'{lan}_recall'] = parsed_data[f'{lan}_recall'][1]
                    parsed_data[f'{lan}_precision'] = parsed_data[f'{lan}_precision'][1]
                    parsed_data[f'{lan}_f1'] = parsed_data[f'{lan}_f1'][1]
                    parsed_data.pop(f"{lan}_cm", None)
                    parsed_list.append(parsed_data)
                    break
                except SyntaxError:
                    print(json_str)
                    return None

    if len(parsed_list) == 0:
        print(f"Error: JSON-like structure not found in file {path}")
        return None
    
    assert len(parsed_list) == len(languages), f"file path: {path}"
    return parsed_list

In [8]:
# Get the list of matching files
strings = ["mc"]
mc_matching_files = list_files_with_numbers(directory, numbers, strings)

In [9]:
mc_json_list = [extract_mc_json_structure(path, languages) for path in mc_matching_files]

# Remove None values
mc_filtered_lst = [x for x in mc_json_list if x is not None]

In [9]:
{item[0]['phase'] for item in mc_filtered_lst}

{'mc', 'mc_adj', 'mc_bin', 'mc_focal', 'mc_focal_adj', 'mc_freeze'}

In [10]:
mc_filtered_lst

[[{'Python_accuracy': 0.9506944444444444,
   'Python_ap': 0.5134560405325834,
   'Python_recall': 0.1954023,
   'Python_precision': 0.94444444,
   'Python_f1': 0.32380952,
   'Python_threshold': 0.315,
   'Python_mcc': 0.4174517955620673,
   'lan': 'Python',
   'phase': 'mc_freeze'},
  {'TypeScript_accuracy': 0.9024390243902439,
   'TypeScript_ap': 0.14691499656413354,
   'TypeScript_recall': 0.0,
   'TypeScript_precision': 0.0,
   'TypeScript_f1': 0.0,
   'TypeScript_threshold': 0.0358,
   'TypeScript_mcc': 0.0,
   'lan': 'TypeScript',
   'phase': 'mc_freeze'},
  {'Java_accuracy': 0.9376299376299376,
   'Java_ap': 0.1383632465788265,
   'Java_recall': 0.02352941,
   'Java_precision': 0.22222222,
   'Java_f1': 0.04255319,
   'Java_threshold': 0.1001,
   'Java_mcc': 0.05495225141052885,
   'lan': 'Java',
   'phase': 'mc_freeze'},
  {'CS_accuracy': 0.8282828282828283,
   'CS_ap': 0.26965827780086327,
   'CS_recall': 0.0,
   'CS_precision': 0.0,
   'CS_f1': 0.0,
   'CS_threshold': 0.0234,

In [10]:
# unique_phases = {item["phase"] for item in mc_filtered_lst if "phase" in item.keys()}
# grouped_data = {}
# averaged_data = {}
# for item in filtered_lst:
#     lan = item['lan']
#     if lan not in grouped_data:
#         grouped_data[lan] = []
#     grouped_data[lan].append(item)

# for lan, items in grouped_data.items():
#     avg_item = {'lan': lan}
#     metric_count = len(items)
#     for item in items:
#         for key, value in item.items():
#             if key != 'lan' and isinstance(value, (int, float)):
#                 prefix = f"{lan}_"
#                 if key.startswith(prefix):
#                     metric_key = key[len(prefix):]
#                     avg_item[metric_key] = avg_item.get(metric_key, 0) + value
#     for key, value in avg_item.items():
#         if key != 'lan':
#             avg_item[key] = value / metric_count
#             avg_item[key] = round(avg_item[key], 4)
#     averaged_data[lan] = avg_item

# write a short email to prof. Ali that as my wife's flight arrival time changed from 6pm to 4pm, I need to leave earlier to pick her up. After that I will work at home to keep progress.


ablation_list = []

phases = {item[0]['phase'] for item in mc_filtered_lst}

for phase in phases:
    mc_phase_list = [item for item in mc_filtered_lst if item[0]["phase"] == phase]

    grouped_data = {}
    averaged_data = {}

    mc_phase_list = [item for sublist in mc_phase_list for item in sublist]

    for item in mc_phase_list:
        lan = item['lan']
        if lan not in grouped_data:
            grouped_data[lan] = []
        grouped_data[lan].append(item)

    for lan, items in grouped_data.items():
        avg_item = {'lan': lan}
        metric_count = len(items)
        for item in items:
            for key, value in item.items():
                if key != 'lan' and key != 'phase' and isinstance(value, (int, float)):
                    prefix = f"{lan}_"
                    if key.startswith(prefix):
                        metric_key = key[len(prefix):]
                        avg_item[metric_key] = avg_item.get(metric_key, 0) + value
        for key, value in avg_item.items():
            if key != 'lan':
                avg_item[key] = value / metric_count
                avg_item[key] = round(avg_item[key], 4)
        averaged_data[lan] = avg_item
    
    averaged_data['phase'] = phase
    ablation_list.append(averaged_data)

In [13]:
len(ablation_list)

6

In [14]:
ablation_list[0]

{'Python': {'lan': 'Python',
  'accuracy': 0.9864,
  'ap': 0.881,
  'recall': 0.8105,
  'precision': 0.9757,
  'f1': 0.8847,
  'threshold': 0.7639,
  'mcc': 0.8822},
 'TypeScript': {'lan': 'TypeScript',
  'accuracy': 0.9197,
  'ap': 0.315,
  'recall': 0.0801,
  'precision': 0.4231,
  'f1': 0.1294,
  'threshold': 0.1246,
  'mcc': 0.161},
 'Java': {'lan': 'Java',
  'accuracy': 0.9389,
  'ap': 0.4125,
  'recall': 0.1962,
  'precision': 0.783,
  'f1': 0.3066,
  'threshold': 0.0916,
  'mcc': 0.366},
 'CS': {'lan': 'CS',
  'accuracy': 0.8753,
  'ap': 0.7205,
  'recall': 0.4724,
  'precision': 0.854,
  'f1': 0.6008,
  'threshold': 0.2464,
  'mcc': 0.5721},
 'CCPP': {'lan': 'CCPP',
  'accuracy': 0.9536,
  'ap': 0.3295,
  'recall': 0.1312,
  'precision': 0.7442,
  'f1': 0.2204,
  'threshold': 0.0716,
  'mcc': 0.2966},
 'JavaScript': {'lan': 'JavaScript',
  'accuracy': 0.9164,
  'ap': 0.51,
  'recall': 0.296,
  'precision': 0.6576,
  'f1': 0.4029,
  'threshold': 0.1374,
  'mcc': 0.4015},
 'phase

In [11]:
from prettytable import PrettyTable

for result in ablation_list:
    print('***', result['phase'], '***')
    table = PrettyTable(['Language','Accuracy', 'AP','F1', 'Recall','Precision', 'mcc'])
    accuracy = []
    ap = []
    f1 = []
    recall = []
    precision = []
    mcc = []
    
    for lan in languages:
        table.add_row([lan, result[lan]['accuracy'], result[lan]['ap'], result[lan]['f1'], result[lan]['recall'],\
         result[lan]['precision'], result[lan]['mcc']])
        accuracy.append(result[lan]['accuracy'])
        ap.append(result[lan]['ap'])
        f1.append(result[lan]['f1'])
        recall.append(result[lan]['recall'])
        precision.append(result[lan]['precision'])
        mcc.append(result[lan]['mcc'])
    # add the average results
    table.add_row(['Average', sum(accuracy)/len(accuracy), sum(ap)/len(ap), sum(f1)/len(f1), sum(recall)/len(recall),\
         sum(precision)/len(precision), sum(mcc)/len(mcc)])
    print(table)

*** mc_freeze ***
+------------+--------------------+---------------------+--------+----------------------+---------------------+----------------------+
|  Language  |      Accuracy      |          AP         |   F1   |        Recall        |      Precision      |         mcc          |
+------------+--------------------+---------------------+--------+----------------------+---------------------+----------------------+
|   Python   |       0.9422       |        0.4249       | 0.2229 |        0.1326        |        0.7482       |        0.2988        |
| TypeScript |       0.9182       |        0.2129       |  0.0   |         0.0          |         0.0         |         0.0          |
|    Java    |       0.9268       |        0.1591       | 0.0625 |        0.0368        |        0.2214       |        0.0706        |
|     CS     |       0.7932       |        0.3064       |  0.0   |         0.0          |         0.0         |       -0.0094        |
|    CCPP    |       0.9496       |  

In [52]:
def test(a=1, b=2):
    return a+b

test(c=3)

TypeError: test() got an unexpected keyword argument 'c'