In [None]:
!pip install bert-score

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m180.3 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13


In [None]:
import zipfile
import pandas as pd
import os
from bert_score import score
from google.colab import files

def unzip_and_remove(zip_file_path):
    base_dir = zip_file_path.replace(".zip", "")
    os.makedirs(base_dir, exist_ok=True)

    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(base_dir)

    nested_dir = os.path.join(base_dir, os.path.basename(base_dir))
    if os.path.exists(nested_dir):
        for file_name in os.listdir(nested_dir):
            os.rename(os.path.join(nested_dir, file_name), os.path.join(base_dir, file_name))
        os.rmdir(nested_dir)

    return base_dir

def convert_ground_truth_to_feature(ground_truth_dir):
    for file_name in os.listdir(ground_truth_dir):
        if file_name.startswith("caso_") and not file_name.endswith(".feature"):
            original_path = os.path.join(ground_truth_dir, file_name)

            file_base_name, file_extension = os.path.splitext(file_name)
            if file_extension:  
                new_file_name = f"{file_base_name}.feature"
            else:  
                new_file_name = f"{file_name}.feature"

            new_path = os.path.join(ground_truth_dir, new_file_name)
            os.rename(original_path, new_path)
            print(f"Convertido {file_name} para {new_file_name}")

def read_feature_files(directory):
    feature_files = {}
    for file_name in os.listdir(directory):
        if file_name.endswith(".feature"):
            with open(os.path.join(directory, file_name), 'r') as file:
                feature_files[file_name] = file.read()
    return feature_files

def calculate_bertscore_similarity(ground_truth, llm_output):
    P, R, F1 = score([llm_output], [ground_truth], lang='pt', rescale_with_baseline=True)
    return round(F1.mean().item() * 100, 2)  

def read_model_info(base_dir):
    model_info_path = os.path.join(base_dir, "model.txt")
    with open(model_info_path, "r") as file:
        lines = file.readlines()
    model = lines[0].split(":")[1].strip()
    technique = lines[1].split(":")[1].strip()
    return model, technique

def compare_with_ground_truth(ground_truth_dir, features_dir):
    results = []

    ground_truth_files = read_feature_files(ground_truth_dir)

    for i in range(1, 11):
        ground_truth_file_name = f"caso_{i}.feature"

        if ground_truth_file_name not in ground_truth_files:
            continue

        ground_truth_content = ground_truth_files[ground_truth_file_name]

        for exec_num in range(1, 6):
            exec_dir = os.path.join(features_dir, f"exec_{exec_num}")
            compare_files = read_feature_files(exec_dir)

            if ground_truth_file_name in compare_files:
                compare_content = compare_files[ground_truth_file_name]

                score = calculate_bertscore_similarity(ground_truth_content, compare_content)

                results.append({
                    "Caso Base": ground_truth_file_name,
                    "Execução Comparada": f"exec_{exec_num}",
                    "Pontuação BERTScore": score
                })

    results.sort(key=lambda x: (x['Caso Base'], -x['Pontuação BERTScore']))

    ranked_results = []
    for caso_base in set([r['Caso Base'] for r in results]):
        caso_results = [r for r in results if r['Caso Base'] == caso_base]
        for rank, r in enumerate(caso_results, start=1):
            r['Ranking'] = rank
            ranked_results.append(r)

    return pd.DataFrame(ranked_results)

print("Por favor, faça o upload do arquivo ZIP para o ground truth:")
uploaded_ground_truth = files.upload()

print("Por favor, faça o upload do arquivo ZIP para o modelo:")
uploaded_model = files.upload()

ground_truth_zip = list(uploaded_ground_truth.keys())[0]
features_zip = list(uploaded_model.keys())[0]

def main(ground_truth_zip, features_zip):
    ground_truth_dir = unzip_and_remove(ground_truth_zip)
    features_dir = unzip_and_remove(features_zip)

    convert_ground_truth_to_feature(ground_truth_dir)

    model, technique = read_model_info(features_dir)

    df_results = compare_with_ground_truth(ground_truth_dir, features_dir)

    model_safe = model.replace("/", "-")
    technique_safe = technique.replace("/", "-")
    output_csv = f"ranking_similaridade_bertscore_{model_safe}_{technique_safe}.csv"

    df_results.to_csv(output_csv, index=False)
    print(f"Resultados salvos em {output_csv}")

    print("\nVisualização do DataFrame com o Ranking de Similaridade:")
    print(df_results)

main(ground_truth_zip, features_zip)

Por favor, faça o upload do arquivo ZIP para o ground truth:


Saving ground_truth.zip to ground_truth.zip
Por favor, faça o upload do arquivo ZIP para o modelo:


Saving features-lucataco-phi-3-mini-128k-instruct_45ba1bd0a3cf3d5254becd00d937c4ba0c01b13fa1830818f483a76aa844205e.zip to features-lucataco-phi-3-mini-128k-instruct_45ba1bd0a3cf3d5254becd00d937c4ba0c01b13fa1830818f483a76aa844205e.zip
Convertido caso_6.txt para caso_6.feature
Convertido caso_7.txt para caso_7.feature
Convertido caso_2.txt para caso_2.feature
Convertido caso_3.txt para caso_3.feature
Convertido caso_9.txt para caso_9.feature
Convertido caso_1.txt para caso_1.feature
Convertido caso_10.txt para caso_10.feature
Convertido caso_4.txt para caso_4.feature
Convertido caso_5.txt para caso_5.feature
Convertido caso_8.txt para caso_8.feature
Resultados salvos em ranking_similaridade_bertscore_lucataco-phi-3-mini-128k-instruct_one_shot.csv

Visualização do DataFrame com o Ranking de Similaridade:
          Caso Base Execução Comparada  Pontuação BERTScore  Ranking
0    caso_6.feature             exec_2                49.68        1
1    caso_6.feature             exec_5           

In [None]:
import os
import shutil
import glob

def delete_files_by_pattern(pattern):
    files = glob.glob(pattern)
    for file_path in files:
        if os.path.isfile(file_path):
            os.remove(file_path)
            print(f"Arquivo '{file_path}' apagado com sucesso.")

def delete_directories_by_pattern(pattern):
    directories = glob.glob(pattern)
    for dir_path in directories:
        if os.path.isdir(dir_path):
            shutil.rmtree(dir_path)
            print(f"Pasta '{dir_path}' apagada com sucesso.")

def main():
    delete_files_by_pattern("ranking_similaridade_bertscore_*.csv")

    delete_files_by_pattern("features-*.zip")
    delete_files_by_pattern("ground_truth.zip")

    delete_directories_by_pattern("features-*")
    delete_directories_by_pattern("ground_truth")

main()

Arquivo 'ranking_similaridade_bertscore_gpt-4o-mini_zero_shot.csv' apagado com sucesso.
Arquivo 'features-gpt-4o-mini.zip' apagado com sucesso.
Arquivo 'ground_truth.zip' apagado com sucesso.
Pasta 'features-gpt-4o-mini' apagada com sucesso.
Pasta 'ground_truth' apagada com sucesso.
