In [None]:
import os
import zipfile
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial.distance import cityblock

def find_zip_file(prefix):
    for file in os.listdir():
        if file.startswith(prefix) and file.endswith('.zip'):
            return file
    raise FileNotFoundError(f"Nenhum arquivo ZIP encontrado com o prefixo '{prefix}'.")

def unzip_and_remove(zip_file_path):
    base_dir = zip_file_path.replace(".zip", "")
    os.makedirs(base_dir, exist_ok=True)

    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(base_dir)

    os.remove(zip_file_path)

    nested_dir = os.path.join(base_dir, os.path.basename(base_dir))
    if os.path.exists(nested_dir):
        for file_name in os.listdir(nested_dir):
            os.rename(os.path.join(nested_dir, file_name), os.path.join(base_dir, file_name))
        os.rmdir(nested_dir)

    return base_dir  

def convert_ground_truth_to_feature(ground_truth_dir):
    for file_name in os.listdir(ground_truth_dir):
        if file_name.startswith("caso_") and not file_name.endswith(".feature"):
            original_path = os.path.join(ground_truth_dir, file_name)

            file_base_name, file_extension = os.path.splitext(file_name)
            if file_extension:  
                new_file_name = f"{file_base_name}.feature"
            else:  
                new_file_name = f"{file_name}.feature"

            new_path = os.path.join(ground_truth_dir, new_file_name)
            os.rename(original_path, new_path)
            print(f"Convertido {file_name} para {new_file_name}")

def read_feature_files(directory):
    feature_files = {}
    for file_name in os.listdir(directory):
        if file_name.endswith(".feature"):
            with open(os.path.join(directory, file_name), 'r') as file:
                feature_files[file_name] = file.read()
    return feature_files

def manhattan_distance(text1, text2):
    vectorizer = CountVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    return cityblock(vectors[0], vectors[1])

def read_model_info(base_dir):
    model_info_path = os.path.join(base_dir, "model.txt")
    with open(model_info_path, "r") as file:
        lines = file.readlines()
    model = lines[0].split(":")[1].strip()
    technique = lines[1].split(":")[1].strip()
    return model, technique

def compare_with_ground_truth(ground_truth_dir, features_dir):
    results = []

    ground_truth_files = read_feature_files(ground_truth_dir)

    for i in range(1, 11):  
        ground_truth_file_name = f"caso_{i}.feature"
        if ground_truth_file_name not in ground_truth_files:
            continue

        ground_truth_content = ground_truth_files[ground_truth_file_name]
        distances = []

        for exec_num in range(1, 6):
            compare_files = read_feature_files(os.path.join(features_dir, f"exec_{exec_num}"))
            if ground_truth_file_name in compare_files:
                compare_content = compare_files[ground_truth_file_name]
                distance = manhattan_distance(ground_truth_content, compare_content)
                distances.append((f"exec_{exec_num}", distance))

        distances.sort(key=lambda x: x[1])
        for rank, (exec_name, distance) in enumerate(distances, 1):
            results.append({"Caso Base": ground_truth_file_name, "Execução Comparada": exec_name, "Distância de Manhattan": distance, "Ranking": rank})

    return pd.DataFrame(results)

def main():
    ground_truth_zip = find_zip_file('ground_truth')
    features_zip = find_zip_file('features-')

    ground_truth_dir = unzip_and_remove(ground_truth_zip)
    features_dir = unzip_and_remove(features_zip)

    convert_ground_truth_to_feature(ground_truth_dir)

    model, technique = read_model_info(features_dir)

    df_results = compare_with_ground_truth(ground_truth_dir, features_dir)

    model_safe = model.replace("/", "-")
    technique_safe = technique.replace("/", "-")

    output_csv = f"ranking_similaridade_manhattan_{model_safe}_{technique_safe}.csv"

    df_results.to_csv(output_csv, index=False)
    print(f"Resultados salvos em {output_csv}")

    print("\nVisualização do DataFrame com o Ranking de Similaridade:")
    print(df_results)

main()

Convertido caso_6.txt para caso_6.feature
Convertido caso_7.txt para caso_7.feature
Convertido caso_2.txt para caso_2.feature
Convertido caso_3.txt para caso_3.feature
Convertido caso_9.txt para caso_9.feature
Convertido caso_1.txt para caso_1.feature
Convertido caso_10.txt para caso_10.feature
Convertido caso_4.txt para caso_4.feature
Convertido caso_5.txt para caso_5.feature
Convertido caso_8.txt para caso_8.feature
Resultados salvos em ranking_similaridade_manhattan_gpt-3.5-turbo-0125_few_shot.csv

Visualização do DataFrame com o Ranking de Similaridade:
          Caso Base Execução Comparada  Distância de Manhattan  Ranking
0    caso_1.feature             exec_4                      44        1
1    caso_1.feature             exec_1                      45        2
2    caso_1.feature             exec_2                      50        3
3    caso_1.feature             exec_3                      50        4
4    caso_1.feature             exec_5                      52        5
5   

In [None]:
import os
import shutil

def remove_unzipped_directories_and_csv():
    """
    Remove diretórios que começam com 'features-', o diretório 'ground_truth' e arquivos .csv.
    """
    current_dir = os.getcwd()
    directories = [d for d in os.listdir(current_dir) if os.path.isdir(os.path.join(current_dir, d))]

    for directory in directories:
        if directory.startswith('features-') or directory == 'ground_truth':
            dir_path = os.path.join(current_dir, directory)
            try:
                shutil.rmtree(dir_path)  
                print(f"Diretório removido: {dir_path}")
            except Exception as e:
                print(f"Erro ao remover {dir_path}: {e}")

    csv_files = [f for f in os.listdir(current_dir) if f.endswith('.csv')]

    for file_name in csv_files:
        file_path = os.path.join(current_dir, file_name)
        try:
            os.remove(file_path)  
            print(f"Arquivo CSV removido: {file_path}")
        except Exception as e:
            print(f"Erro ao remover {file_path}: {e}")

remove_unzipped_directories_and_csv()

Diretório removido: /content/ground_truth
Diretório removido: /content/features-meta-meta-llama-3-70b-instruct
Arquivo CSV removido: /content/ranking_similaridade_manhattan_meta-meta-llama-3-70b-instruct_few_shot.csv
