In [411]:
import pandas as pd
output_test = pd.read_json("../eval-data/output_recente.json")
ground_test = pd.read_json("../eval-data/test_recente.json")

In [412]:

def calcularResultados(tp, fp, fn, tn):
    precision = round(tp / (tp + fp) if (tp + fp) > 0 else 0, 4)
    
    recall = round(tp / (tp + fn) if (tp + fn) > 0 else 0, 4)
    f1 = round(2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0, 4)
    # acuracia para essa classe em questão. Total de acertos / total de exemplos
    accuracy = round((tp + tn) / (tp + fp + fn + tn) if (tp + fp + fn + tn) > 0 else 0, 4)
    return precision, recall, f1, accuracy



def calculateMetrics(pred_label, test_label, target_label):
    """
    Calcula métricas precisao, revocação, f1 e acurácia para o label 'category' ou 'action'
    """
    unique_labels = set(pred_label).union(set(test_label))
    results = []
    for label in unique_labels:
        # Condições para cada métrica
        # True Positive: previu a categoria e era a categoria
        tp = float(((pred_label == label) & (test_label == label)).sum())
        # False Positive: previu a categoria e não era
        fp = float(((pred_label == label) & (test_label != label)).sum())
        # False Negative: não previu a categoria, mas de fato era
        fn = float(((pred_label != label) & (test_label == label)).sum())
        # True Negatives: não previu a categoria e de fato não era
        tn = float(((pred_label != label) & (test_label != label)).sum())
        print(f"tp: {tp}, fp: {fp}, fn: {fn}, tn: {tn}")
        precision, recall, f1, accuracy = calcularResultados(tp, fp, fn, tn)
        
        new_json = {
            target_label: label,
            'precision': precision,
            'recall': recall,
            'f1-score': f1,
            'accuracy': accuracy,
        }
        results.append(new_json)
    return results



def media_por_label(teste, output,target_label):
    """
    Essa função faz o mesmo que a função "media_por_categoria" usada no repo 'llm_evaluation'
    Porém mais flexível, podendo ser usada para calcular o quanto que, para cada action ou category, o LLM acerta os outros paramêtros
    """
    unique_labels = set(output[target_label]).union(set(teste[target_label]))
    media = {label: 0 for label in unique_labels}
    total_labels = len(teste.columns)
    for label in unique_labels:
        indices = output[output[target_label] == label].index
        linhas_label_teste = teste.loc[indices]
        linhas_label_output = output.loc[indices]
        for (_, row_test), (_, row_out) in zip(linhas_label_teste.iterrows(), linhas_label_output.iterrows()):
            acertos = sum(val_test == val_out for val_test, val_out in zip(row_test.values, row_out.values))/total_labels
            media[label] += acertos
        media[label] = round(media[label] / len(linhas_label_output), 4)
    
    return [media]


In [413]:
output_test.head(15)

Unnamed: 0,intent,category,action,requirement,targets,magnitude,start_time,end_time
0,configuration files has to be retrieved from c...,transfer,pull,,central media server,,2025-01-27 01:09:13.238840,NaT
1,Pull custom monitoring tools from the local ne...,transfer,pull,,custom monitoring tools,,2025-01-27 01:09:13.238859,NaT
2,For IoT devices integrated into the campus net...,regulate,block,video streaming services,IoT devices integrated into the campus network,,2025-01-27 01:09:13.238863,NaT
3,error,,,,,,NaT,NaT
4,Keep a record of real-time data throughput per...,construct,discover,,personal laptops in the faculty network,,2025-01-27 01:09:13.238866,NaT
5,Identifique dispositivos móveis que suportam c...,construct,discover,,,,2025-01-27 01:09:13.238868,NaT
6,Notify devices in the quality assurance lab th...,transfer,push,secure peer-to-peer file sharing service and c...,devices in the quality assurance lab,,2025-01-27 01:09:13.238870,NaT
7,Real-time data throughput per device associate...,regulate,monitor,data throughput per device,research lab computers,,2025-01-27 01:09:13.238872,NaT
8,Streaming services must have priority in the n...,regulate,prioritize,,streaming services,download speed greater than 100mbps,2025-01-27 01:09:13.238874,NaT
9,For devices connected via the staff-only netwo...,regulate,block,,devices connected via the staff-only network,,2025-01-27 01:09:13.238876,NaT


In [414]:
ground_test.head(15)

Unnamed: 0,intent,category,action,requirement,targets,magnitude
0,configuration files has to be retrieved from c...,transfer,pull,configuration files,central media server,
1,Pull custom monitoring tools from the local ne...,transfer,pull,custom monitoring tools,local network department,
2,For IoT devices integrated into the campus net...,regulate,block,platforms hosting video streaming services,IoT devices integrated into the campus network,
3,guest Wi-Fi network should have its packet los...,regulate,monitor,packet loss rates,guest Wi-Fi network,
4,Keep a record of real-time data throughput per...,regulate,monitor,real-time data throughput per device,personal laptops in the faculty network,
5,Identify mobile devices supporting VPN compati...,construct,discover,VPN compatibility,mobile devices,
6,Notify devices in the quality assurance lab th...,construct,advertise,secure peer-to-peer file sharing servicecritic...,devices in the quality assurance lab,
7,real-time data throughput per device associate...,regulate,monitor,real-time data throughput per device,research lab computers,
8,streaming services must have priority in the n...,regulate,prioritize,,streaming services,download speed greater than 100mbps
9,For devices connected via the staff-only netwo...,regulate,block,community forums and discussion boards,devices connected via the staff-only network,


In [415]:
metrics = calculateMetrics(output_test['category'], ground_test['category'], 'category')
metrics


tp: 3.0, fp: 1.0, fn: 1.0, tn: 10.0
tp: 0.0, fp: 1.0, fn: 0.0, tn: 14.0
tp: 4.0, fp: 0.0, fn: 3.0, tn: 8.0
tp: 3.0, fp: 3.0, fn: 1.0, tn: 8.0


[{'category': 'transfer',
  'precision': 0.75,
  'recall': 0.75,
  'f1-score': 0.75,
  'accuracy': 0.8667},
 {'category': '',
  'precision': 0.0,
  'recall': 0,
  'f1-score': 0,
  'accuracy': 0.9333},
 {'category': 'regulate',
  'precision': 1.0,
  'recall': 0.5714,
  'f1-score': 0.7272,
  'accuracy': 0.8},
 {'category': 'construct',
  'precision': 0.5,
  'recall': 0.75,
  'f1-score': 0.6,
  'accuracy': 0.7333}]

In [416]:
media = media_por_label(ground_test, output_test, 'category')
media

[{'transfer': 0.5, '': 0.1667, 'regulate': 0.625, 'construct': 0.4167}]

In [417]:

import json
import os
def save_results(file, results):
    directory = os.path.dirname(file)
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(file, 'w') as f:
        json.dump(results, f, indent=4)


In [418]:
test_size = 399
model_name = 'llama'
current_model = 'llama3'
results = [{'intents': test_size}] + metrics + media
file_path = f"./results/{model_name}/{current_model}.json"
save_results(file_path, results)

In [419]:
f = open('teste.json')
a = json.load(f)
a

FileNotFoundError: [Errno 2] No such file or directory: 'teste.json'

In [410]:
for c in a:
    if c.get('category'):
        print(c)

{'category': 'transfer', 'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'accuracy': 0.8667}
{'category': 'regulate', 'precision': 1.0, 'recall': 0.5714, 'f1-score': 0.7272, 'accuracy': 0.8}
{'category': 'construct', 'precision': 0.5, 'recall': 0.75, 'f1-score': 0.6, 'accuracy': 0.7333}
