## Obtain unique sysnames and procnames

In [None]:
import os
import json

# Define the root directory and category folders
root_dir = '/home/belief/Desktop/MalwareDetection/JSONs/Capturing-logs'
categories = ["AdwareJson", "BankingJson", "BenignJson", "RiskWareJson", "SmsJson"]

# Initialize sets to store unique sysname and procname values
unique_sysnames = set()
unique_procnames = set()

# Function to parse JSON and collect sysname and procname values
def collect_unique_sysname_procname(file_path):
    try:
        with open(file_path) as f:
            data = json.load(f)

        # Go through each syscall and collect sysname and procname
        syscalls = data['behaviors']['dynamic']['host']
        for syscall in syscalls:
            syscall_info = syscall['low'][0]

            # Collect unique sysname
            if 'sysname' in syscall_info:
                unique_sysnames.add(syscall_info['sysname'])

            # Collect unique procname if available
            procname = syscall.get('procname')
            if procname:
                unique_procnames.add(procname)

    except json.JSONDecodeError:
        print(f"Skipping file due to JSONDecodeError: {file_path}")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

# Iterate through each category folder and JSON files within
for category in categories:
    category_dir = os.path.join(root_dir, category)
    for file_name in os.listdir(category_dir):
        if file_name.endswith('.json'):
            file_path = os.path.join(category_dir, file_name)
            collect_unique_sysname_procname(file_path)

# Save the unique sets to a file
output_path = os.path.join(root_dir, "unique_sysnames_procnames.txt")
with open(output_path, 'w') as f:
    f.write("Unique Sysnames:\n")
    f.write("\n".join(sorted(unique_sysnames)) + "\n\n")  # Sort for readability
    f.write("Unique Procnames:\n")
    f.write("\n".join(sorted(unique_procnames)) + "\n")

print(f"Unique sysnames and procnames saved to {output_path}")
