In [1]:
import pandas as pd
import ast
import numpy as np
import os

In [2]:
def recall_at_k(expected_files, actual_files, k):
     return len(set(actual_files[:k]) & set(expected_files)) / len(expected_files)

def precision_at_k(expected_files, actual_files, k):
    return len(set(actual_files[:k]) & set(expected_files)) / k

def f1(expected_files, actual_files, k):
    TP_set = set(expected_files) & set(actual_files)
    FN_set = set(expected_files) - set(actual_files)
    FP_set = set(actual_files) - set(expected_files)
    
    TP = len(TP_set)
    FN = len(FN_set)
    FP = len(FP_set)
    
    # Compute precision, recall, and F1-score
    P = TP / (TP + FP)
    R = TP / (TP + FN)
    F1 = 2 * P * R / (P + R)
    return F1

def get_expected_files_indexes(expected_files, actual_files) -> np.ndarray[int]:
    relevant = np.isin(expected_files, actual_files).astype(int)
    return np.where(relevant == 1)[0]

In [3]:
def calc_search_metrics(expected_files, actual_files):
    if len(expected_files) == 1:
        k = 1
    else:
        k = 2
    if len(set(actual_files) & set(expected_files)) == 0:
        return {
            'R@k': 0,
            'P@k': 0,
            'f1': 0,
        }
        
    metrics = {}
    metrics['R@k'] = recall_at_k(expected_files, actual_files, k)
    metrics['P@k'] = precision_at_k(expected_files, actual_files, k)
    metrics['f1'] = f1(expected_files, actual_files, k)
    return metrics

In [4]:
def calc_retrive_metrics(expected_files, actual_files, distances):
    expected_files_indexes = get_expected_files_indexes(expected_files, actual_files)
    if len(expected_files_indexes) == 0:
        return {
            "first_expected_pos": None,
            "last_expected_pos": None,
            "first_expected_distance": None,
            "last_expected_distance": None
        }
    metrics = {
        "first_expected_pos": expected_files_indexes[0] / len(actual_files),
        "last_expected_pos": expected_files_indexes[-1] / len(actual_files),
        "first_expected_index": expected_files_indexes[0],
        "last_expected_index": expected_files_indexes[-1],
        "first_expected_distance": distances[expected_files_indexes[0]],
        "last_expected_distance": distances[expected_files_indexes[-1]],
    }
    return metrics

In [5]:
def get_chat_metrics(results_path) -> pd.DataFrame:
    df = pd.read_csv(results_path)
    df['changed_files'] = df['changed_files'].map(lambda lst: ast.literal_eval(lst))
    df['final_files'] = df['final_files'].map(lambda lst: ast.literal_eval(lst))
    df['all_generated_files'] = df['all_generated_files'].map(lambda lst: ast.literal_eval(lst))

    metrics = []
    for i, row in df.iterrows():
        expected_files = row['changed_files']
        actual_files = row['final_files']
        m = calc_search_metrics(expected_files, actual_files)
        m['time_s'] = row['time_ms'] / 1000
        m['batches_count'] = row['batches_count']
        m['empty_output'] = 1 if len(row['final_files']) == 0 else 0
        m['irrelevant_output'] = 1 if len(set(row['changed_files']) & set(row['final_files'])) == 0 else 0
        m['wrong_output'] = 1 if len(set(row['all_generated_files']) - set(row['final_files'])) > 0 else 0
        metrics.append(m)

    return pd.DataFrame(metrics)

In [6]:
import re

def add_commas_after_second_tick(s):
    backtick_positions = [pos for pos, char in enumerate(s) if char == "'"]
    for i in range(len(backtick_positions) // 2):
        s = s[:backtick_positions[i * 2 + 1] + 1 + i] + ',' + s[backtick_positions[i * 2 + 1] + 1 + i:]
    return s

def get_emb_metrics(results_path) -> pd.DataFrame:
    df = pd.read_csv(results_path)
    df['final_files'] = df['final_files'].map(lambda lst: ast.literal_eval(lst))
    df['rank_scores'] = df['rank_scores'].map(lambda lst: ast.literal_eval(lst))
    df['changed_files'] = df['changed_files'].map(lambda lst: ast.literal_eval(lst))
    metrics = []
    for i, row in df.iterrows():
        expected_files = row['changed_files']
        actual_files = row['final_files']
        m = {}
        search_m = calc_search_metrics(expected_files, actual_files)
        m.update(search_m)
        retrive_m = calc_retrive_metrics(expected_files, actual_files, row['rank_scores'])
        m.update(retrive_m)
        m['time_s'] = row['time_ms'] / 1000
        metrics.append(m)

    return pd.DataFrame(metrics)

In [7]:
metrics = {}

In [8]:
results = '/home/tigina/lca-baselines/bug_localization/output'

## Chat-based baselines

In [9]:
metrics['openai_chat_gpt-3.5-turbo-1106'] = get_emb_metrics(os.path.join(results, 'openai_chat_gpt-3.5-turbo-1106/results.csv'))
df_emb_metrics['openai_chat_gpt-3.5-turbo-1106'].dropna().mean()

FileNotFoundError: [Errno 2] No such file or directory: '/home/tigina/lca-baselines/bug_localization/output/openai_chat_gpt-3.5-turbo-1106/results.csv'