In [1]:
import os
import json

def load_all_json_files(folder_path):
    """
    Load all JSON files from a directory into a list.

    Args:
        folder_path (str): Path to the folder containing JSON files.

    Returns:
        list: List of dictionaries, one per JSON file.
    """
    all_results = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    data["file"] = filename  # Optional: add filename info
                    all_results.append(data)
            except Exception as e:
                print(f"Failed to load {filename}: {e}")
    
    return all_results

# Example usage
if __name__ == "__main__":
    folder = '/home/raisul/ANALYSED_DATA/prob_disasm_pe/'#"/home/raisul/ANALYSED_DATA/prob_disasm_pe/"  # Replace with your directory path
    results = load_all_json_files(folder)
    print(f"Loaded {len(results)} JSON files.")


Loaded 11807 JSON files.


In [2]:
results[0]

{'prob_disasm_false_positive': 115,
 'prob_disasm_false_negative': 8,
 'prob_disasm_true_positive': 1336,
 'prob_disasm_total': 1344,
 'linear_sweep_false_positive': 13,
 'linear_sweep_false_negative': 0,
 'linear_sweep_true_positive': 1344,
 'linear_sweep_total': 1344,
 'file': '0717187c26eeedc114334baa13e5a6e9.exe.json'}

In [3]:
# Filter results with linear_sweep_false_negative > 0
ls_fn_gt_zero = [res for res in results if res.get("linear_sweep_false_negative", 0) > 0]

# Print summary
print(f"Found {len(ls_fn_gt_zero)} files with linear_sweep_false_negative > 0:\n")
for res in ls_fn_gt_zero:
    print(f"{res['file']}: linear_sweep_false_negative = {res['linear_sweep_false_negative']}")

Found 1 files with linear_sweep_false_negative > 0:

fee7ad3d8e6d7fce5e8441fd74229f04.exe.json: linear_sweep_false_negative = 1


In [4]:
# If you haven't already, load results
# results = load_all_json_files(folder)

# Initialize accumulators
total = {
    "prob_disasm_false_positive": 0,
    "prob_disasm_false_negative": 0,
    "prob_disasm_true_positive": 0,
    "prob_disasm_total": 0,
    "linear_sweep_false_positive": 0,
    "linear_sweep_false_negative": 0,
    "linear_sweep_true_positive": 0,
    "linear_sweep_total": 0
}

# Count how many files
n = len(results)

# Sum up all values
for res in results:
    for key in total.keys():
        total[key] += res.get(key, 0)

# Now compute averages
average = {key: (total[key] / n) for key in total}

# Print neatly
print("Averages across all files:")
for key, value in average.items():
    print(f"{key}: {value:.2f}")

Averages across all files:
prob_disasm_false_positive: 118.59
prob_disasm_false_negative: 12.45
prob_disasm_true_positive: 1557.80
prob_disasm_total: 1570.25
linear_sweep_false_positive: 16.82
linear_sweep_false_negative: 0.00
linear_sweep_true_positive: 1570.25
linear_sweep_total: 1570.25
