In [6]:
import os
import json
from multiprocessing import Pool, cpu_count

# Define the input and output directories
input_dir = "/home/belief/Desktop/MalwareDetection/JSONs/Capturing-logs/RiskWareJson"
output_dir = "/home/belief/Desktop/MalwareDetection/Transformer/Databig/Riskwareware"
os.makedirs(output_dir, exist_ok=True)

def process_file(filename):
    """Process a single JSON file to extract and save syscall sequence."""
    filepath = os.path.join(input_dir, filename)
    
    try:
        # Load JSON data
        with open(filepath, 'r') as file:
            data = json.load(file)
        
        # Extract syscall entries from "dynamic" -> "host" -> "low"
        syscalls = []
        for entry in data.get("behaviors", {}).get("dynamic", {}).get("host", []):
            if entry.get("class") == "SYSCALL":
                for low_entry in entry.get("low", []):
                    # Collect sysname and id if type is SYSCALL
                    if low_entry.get("type") == "SYSCALL":
                        syscalls.append({
                            'sysname': low_entry.get('sysname'),
                            'id': low_entry.get('id')
                        })
        
        # Sort syscalls by 'id' and create the sequence of 'sysname'
        sorted_syscalls = sorted(syscalls, key=lambda x: x['id'])
        syscall_sequence = [syscall['sysname'] for syscall in sorted_syscalls]
        
        # Write sequence to output file
        output_path = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}.txt")
        with open(output_path, 'w') as output_file:
            output_file.write(" ".join(syscall_sequence))
        
        return f"Processed {filename}"
    
    except Exception as e:
        return f"Error processing {filename}: {e}"

def main():
    # List all JSON files in the input directory
    json_files = [f for f in os.listdir(input_dir) if f.endswith('.json')]
    
    # Use a pool of workers to process files in parallel
    with Pool(processes=cpu_count()) as pool:
        results = pool.map(process_file, json_files)
    
    # Print results for each file processed
    for result in results:
        print(result)

if __name__ == "__main__":
    main()


Processed EB413EAC6D5BC1921E6DC417991E603D60C831EA0AE6E9A6461CBD062A342FD2.json
Processed 3202241E37271FE7FB3A71BB700FC2D81DBACB63FBC729503D32A40A81AFC03A.json
Processed 97F82403EF4155CE6F31A95719F557DFF523A6B2EA72773520A0F32821E9C7D8.json
Processed 98B454642F48655FF838543D158B72F5A924DD0E686B8B0B371595D938AE7363.json
Processed 185E2C7F23702FAC97EC47D8FD6C9F7B4BFE9D6FCF2A0130B66A2549BC71D47D.json
Processed F7DB974237DDE5066E5B737FA79F37F2764CDA0905BD4765765040F7AE07E349.json
Processed 675506D69FC48FA93E1F8D6CEB543FCB071B0B651B85990582BDFBFBA956B9C1.json
Processed 6B3ACEC87992D4677FFBD000A4B328DE317B7375A56361B70FC52A731B7B384B.json
Processed B2C97373488F0E38B4D0117AEFC507D4CBB2120964E6EAFE01326E4346480017.json
Processed CFDAB4EEC865E58F5A1098C3686A04CAEF8E10E4CBF55D8EFDD116D6C0160D3C.json
Processed CFE969BA03E73D96E5A6B92496685D2C462AD32549443BB12C87135D7A3BA5E6.json
Processed 2CE2A52C7FC912F16375823FD1A97DAD0CD8E814E6B8B8ECB897CA376A1780B7.json
Processed 8865077F5AA0A7A0B4F04460751427