In [1]:
import os
import json
import pathlib

In [2]:
report_dir = "/home/mdafifal.mamun/notebooks/triagerX/training/reports"

In [3]:
def compute_metrics(reports: list):
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    
    print(f"Number of reports: {len(reports)}")
    
    for report_path in sorted(reports):
        with open(report_path.as_posix(), "r") as json_file:
            json_data = json.load(json_file)
            avg_type = "macro avg"

            accuracies.append(json_data["test_accuracy"])
            precisions.append(json_data[avg_type]["precision"])
            recalls.append(json_data[avg_type]["recall"])
            f1s.append(json_data[avg_type]["f1-score"])

    avg_accuracy = sum(accuracies) / len(accuracies)
    avg_precision = sum(precisions) / len(precisions)
    avg_recall = sum(recalls) / len(recalls)
    avg_f1_score = sum(f1s) / len(f1s)

    return {
        "average_test_accuracy": avg_accuracy,
        "average_test_precision": avg_precision,
        "average_test_recall": avg_recall,
        "average_test_f1_score": avg_f1_score
    }

## Evaluation when no prerecossing is used

In [4]:
compute_metrics(list(pathlib.Path(report_dir).glob("classification_report_comp_raw*")))

Number of reports: 3


{'average_test_accuracy': 0.7680690399137001,
 'average_test_precision': 0.6160283045593834,
 'average_test_recall': 0.5656817336052782,
 'average_test_f1_score': 0.581260711955874}

## Evaluation when using custom special tokens like \[TIMESTAMP\], \[HEX\]

In [5]:
compute_metrics(list(pathlib.Path(report_dir).glob("classification_report_comp_sp*")))

Number of reports: 3


{'average_test_accuracy': 0.7885652642934197,
 'average_test_precision': 0.6901914087097714,
 'average_test_recall': 0.5532214383816733,
 'average_test_f1_score': 0.5857131989033775}

## Evaluation when using LlaMA generated summaries + special tokens

How many open source popular projects are similar to Openj9?

Is the LLM generated summary approach is better than treating each content type separately?

In [6]:
compute_metrics(list(pathlib.Path(report_dir).glob("classification_report_comp_summary*")))

Number of reports: 3


{'average_test_accuracy': 0.7918015102481122,
 'average_test_precision': 0.7318254606073173,
 'average_test_recall': 0.5683651166548337,
 'average_test_f1_score': 0.6062964344859102}

In [7]:
import yaml

In [10]:
with open("/home/mdafifal.mamun/notebooks/triagerX/training/training_config/component/component_summary.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

In [14]:
config["weights_save_location"]

'/work/disa_lab/projects/triagerx/models/#{run_name}'