In [698]:
import pandas as pd
output_set = pd.read_json("../eval-data/output_recente.json")
test_set = pd.read_json("../eval-data/test_recente.json")

In [699]:
import re
import json
import os
def pre_process_data(df, is_test=False):
    df = df.fillna("")
    df = df.astype(str)
    df = df.applymap(lambda x: x.lower())
    # Retira pontuações. Útil para ganhar acurácia, O LLM as vezes retorna a frase corretamente, mas sem ponto final. 
    # Logo, pontuação no nosso caso é irrelevante.
    df = df.applymap(lambda x: re.sub(r'[^\w\s]', '', x))
    # Embaralha o conjunto de teste
    if is_test: 
        df = df.sample(frac=1).reset_index(drop=True)
    # Se não for o conjunto de teste, é o output. Remove as colunas start_time e end_time (não é necessário fazer isso no test_set, no arquivo já não há as colunas)
    else: 
        df =  df.drop(columns=['start_time', 'end_time'])
    return df


def save_test_shuffled(test_set):
    test_json = test_set.to_json(orient="records", indent=4)
    with open("eval-data/test_recente.json", "w") as f:
        f.write(test_json)

def save_results(file, results):
    directory = os.path.dirname(file)
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(file, 'w') as f:
        json.dump(results, f, indent=4)


def get_test_set(test_path):
    test_set =  pd.read_json(test_path)
    test_set = pre_process_data(test_set, True)
    return test_set



def calcularResultados(tp, fp, fn, tn):
    precision = round(tp / (tp + fp) if (tp + fp) > 0 else 0, 4)
    recall = round(tp / (tp + fn) if (tp + fn) > 0 else 0, 4)
    f1 = round(2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0, 4)
    # acuracia para essa classe em questão. Total de acertos / total de exemplos
    # Se ele preveu que não era e realmente não era, também é acerto!
    accuracy = round((tp + fn) / (tp + fp + fn + tn) if (tp + fp + fn + tn) > 0 else 0, 4)

    return precision, recall, f1, accuracy


def calculateMetrics(pred_label, test_label, target_label):
    """
    Calcula métricas precisao, revocação, f1 e acurácia para o label 'category' ou 'action'
    """
    unique_labels = set(pred_label).union(set(test_label))
    unique_labels.discard("")
    results = []
    for label in unique_labels:
        # Condições para cada métrica
        # True Positive: previu a categoria e era a categoria
        tp = float(((pred_label == label) & (test_label == label)).sum())
        # False Positive: previu a categoria e não era
        fp = float(((pred_label == label) & (test_label != label)).sum())
        # False Negative: não previu a categoria, mas de fato era
        fn = float(((pred_label != label) & (test_label == label)).sum())
        # True Negatives: não previu a categoria e de fato não era
        tn = float(((pred_label != label) & (test_label != label)).sum())

        precision, recall, f1, accuracy = calcularResultados(tp, fp, fn, tn)
        new_json = {
            target_label: label,
            'precision': precision,
            'recall': recall,
            'f1-score': f1,
            'accuracy': accuracy,
        }
        results.append(new_json)
    return results

def media_por_label(teste, output, target_label):
    """
    Essa função faz o mesmo que a função "media_por_categoria" usada no repo 'llm_evaluation'
    Porém mais flexível, podendo ser usada para calcular o quanto que, para cada action ou category, o LLM acerta os outros paramêtros
    """
    unique_labels = set(output[target_label]).union(set(teste[target_label]))
    unique_labels.discard("")
    media = {label: 0 for label in unique_labels}
    media['evaluation'] = "avg accuracy of parameters"
    media['label'] = target_label
    total_labels = len(teste.columns)
    for label in unique_labels:
        indices = output[output[target_label] == label].index
        linhas_label_teste = teste.loc[indices]
        linhas_label_output = output.loc[indices]
        for (_, row_test), (_, row_out) in zip(linhas_label_teste.iterrows(), linhas_label_output.iterrows()):
            acertos = sum(val_test == val_out for val_test, val_out in zip(row_test.values, row_out.values))/total_labels
            media[label] += acertos
        media[label] = round(media[label] / len(linhas_label_output) if len(linhas_label_output) > 0 else 0, 4)
    media['label'] = target_label
    return [media]

In [700]:
output_set = output_set.fillna("")
output_set = output_set.astype(str)
output_set = output_set.applymap(lambda x: x.lower())
output_set = output_set.applymap(lambda x: re.sub(r'[^\w\s]', '', x))
output_set = output_set.drop(columns=['start_time', 'end_time'])
test_set = test_set.fillna("")
test_set = test_set.astype(str)
test_set = test_set.applymap(lambda x: x.lower())
test_set = test_set.applymap(lambda x: re.sub(r'[^\w\s]', '', x))

  output_set = output_set.applymap(lambda x: x.lower())
  output_set = output_set.applymap(lambda x: re.sub(r'[^\w\s]', '', x))
  test_set = test_set.applymap(lambda x: x.lower())
  test_set = test_set.applymap(lambda x: re.sub(r'[^\w\s]', '', x))


In [701]:
output_set

Unnamed: 0,intent,category,action,requirement,targets,magnitude
0,for devices connected via the staffonly networ...,regulate,block,video streaming services,devices connected via the staffonly network,
1,iot devices have the secure peertopeer file sh...,construct,advertise,critical firmware updates,iot devices,
2,identify iot devices supporting vpn compatibility,construct,discover,vpn compatibility,,
3,policy enforcement scripts is to be pushed to ...,transfer,push,policy enforcement scripts,devices in the quality assurance lab,
4,for workstations in the design team push criti...,transfer,push,critical firmware updates,workstations in the design team,
...,...,...,...,...,...,...
495,ensure that streaming services have higher net...,regulate,prioritize,,,10mbps
496,notify mobile devices in the logistics team th...,transfer,push,policy enforcement script,mobile devices in the logistics team,
497,vpns or proxy tools should be denied for compu...,regulate,block,,computers operating within the dormitory wifi,
498,for enduser devices in the campus guest networ...,regulate,block,community forums and discussion boards,enduser devices in the campus guest network,


In [702]:
test_set

Unnamed: 0,intent,category,action,requirement,targets,magnitude
0,for devices connected via the staffonly networ...,regulate,block,platforms hosting video streaming services,devices connected via the staffonly network,
1,iot devices has the secure peertopeer file sha...,transfer,pull,secure peertopeer file sharing servicecritical...,iot devices,
2,identify iot devices supporting vpn compatibility,construct,discover,vpn compatibility,iot devices,
3,policy enforcement scripts is to be pushed to ...,transfer,push,policy enforcement scripts,devices in the quality assurance lab,
4,for workstations in the design team push criti...,transfer,push,critical firmware updates,workstations in the design team,
...,...,...,...,...,...,...
495,ensure that streaming services have higher net...,regulate,prioritize,,streaming services,upload speed greater than 10mbps
496,notify mobile devices in the logistics team th...,construct,advertise,policy enforcement scripts,mobile devices in the logistics team,
497,vpns or proxy tools should be denied for compu...,regulate,block,vpns or proxy tools,computers operating within the dormitory wifi,
498,for enduser devices in the campus guest networ...,regulate,block,community forums and discussion boards,enduser devices in the campus guest network,


In [703]:
test_size = 500
model_name = 'llama'
current_model = 'teste'
metrics_categoria = calculateMetrics(output_set['category'].head(test_size), test_set['category'].head(test_size), 'category')
media_acertos_categoria = media_por_label(test_set.head(test_size), output_set.head(test_size), 'category')
metrics_action = calculateMetrics(output_set['action'].head(test_size), test_set['action'].head(test_size), 'action')
media_acertos_action = media_por_label(test_set.head(test_size), output_set.head(test_size), 'action')
#Junta as metricas com a media de acerto e salva
results = [{'intents': test_size}] + metrics_categoria + media_acertos_categoria + metrics_action + media_acertos_action
file_path = f"./results/{model_name}/{current_model}.json"
save_results(file_path, results)
    