# Kernel Telemetry Analysis Automation

This notebook automates the process of executing malware samples in a controlled VM environment with kernel-level telemetry collection:
1. Restore VM to snapshot 31
2. Start the VM
3. Install kernel driver (install.bat)
4. Start GP driver service (sc start gp-driver)
5. Execute malware sample (already present in VM)
6. Terminate malware process
7. Process telemetry data (rename main.py.dll to main.py)
8. Run Python analysis script
9. Copy CSV results to shared folder

In [60]:
import os
import time
import subprocess
import itertools
import re
import shutil

In [61]:
# Create necessary directories
os.system('mkdir -p screenshots')
os.system('mkdir -p kernel-telemetry-results')

0

In [62]:
# VM configuration
vm_name = 'windows11-clone Clone'  # Updated VM name
snapshot_no = 34  # Updated snapshot number

# Command templates
vm_open_cmd = f'vboxmanage startvm "{vm_name}"'
run_cmd = f'vboxmanage guestcontrol "{vm_name}" run --username wineleven --password therealtreasure -- '
run_cmd += '"C:\\Windows\\System32\\cmd.exe" "/c" '
copyfrom_cmd = f'vboxmanage guestcontrol "{vm_name}" --username wineleven --password therealtreasure copyfrom '

# Paths and directories
output_folder = 'kernel-telemetry-results'
hafidz_dir = 'C:\\Users\\wineleven\\Documents\\Hafidz\\'
shared_folder_path = "C:\\Users\\wineleven\\Desktop\\Shared"
z_drive = "Z:"

In [63]:
# Utility functions

def tstamp():
    """Get current timestamp in readable format."""
    t = time.time()
    return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + ("%.3f" % (t % 1))[1:]

def check_output_t(command):
    """Execute command and return output."""
    ret = subprocess.check_output(command, shell=True)
    return ret

def run_vm_command(command, wait_time=2):
    """Execute command in VM and wait for completion."""
    full_cmd = run_cmd + f'"{command}"'
    print(f"[{tstamp()}] Executing: {command}")
    try:
        result = subprocess.run(full_cmd, shell=True, capture_output=True, text=True, timeout=30)
        if result.returncode != 0:
            print(f"Warning: Command failed with return code {result.returncode}")
            print(f"Error: {result.stderr}")
        time.sleep(wait_time)
        return result
    except subprocess.TimeoutExpired:
        print(f"Command timed out: {command}")
        return None

In [64]:
# VM operations

def restore(snap_no):
    """Restore VM to specified snapshot."""
    print(f"[{tstamp()}] Restoring snapshot {snap_no}")
    check_output_t(f'vboxmanage snapshot "{vm_name}" restore "Snapshot {snap_no}"')
    print(f"[{tstamp()}] Restoring snapshot {snap_no} done")

def shutdown_and_restore(snap_no):
    """Power off VM and restore to snapshot."""
    print(f"[{tstamp()}] Powering off VM")
    try:
        check_output_t(f'vboxmanage controlvm "{vm_name}" poweroff')
        print(f"[{tstamp()}] Powering off VM done")
    except:
        print('VM (probably) already off')
    time.sleep(3)
    restore(snap_no)

def start_vm():
    """Start the VM."""
    print(f"[{tstamp()}] Starting up VM")
    check_output_t(vm_open_cmd)
    print(f"[{tstamp()}] Starting up VM done")

def take_screenshot(filename):
    """Take a screenshot of the VM."""
    print(f"[{tstamp()}] Taking screenshot")
    os.system(f'vboxmanage controlvm "{vm_name}" screenshotpng screenshots/{filename}.png')
    print(f"[{tstamp()}] Taking screenshot done")

In [65]:
# Kernel driver and telemetry operations

def install_kernel_driver():
    """Install kernel driver using install.bat."""
    print(f"[{tstamp()}] Installing kernel driver")
    # Change to Hafidz directory and run install.bat
    install_cmd = f"cd /d {hafidz_dir} && install.bat"
    run_vm_command(install_cmd, wait_time=5)
    print(f"[{tstamp()}] Kernel driver installation completed")

def start_gp_driver():
    """Start the GP driver service."""
    print(f"[{tstamp()}] Starting GP driver service")
    # Change to Hafidz directory and start service
    start_cmd = f"cd /d {hafidz_dir} && sc start gp-driver"
    run_vm_command(start_cmd, wait_time=3)
    print(f"[{tstamp()}] GP driver service started")
    
def start_ioctl():
    """Run IOCTL.exe after starting GP driver service."""
    print(f"[{tstamp()}] Starting IOCTL.exe")
    # Change to Hafidz directory and run IOCTL.exe
    ioctl_cmd = f"cd /d {hafidz_dir} && IOCTL.exe"
    run_vm_command(ioctl_cmd, wait_time=5)
    print(f"[{tstamp()}] IOCTL.exe started")
    
def process_alert_data():
    """Process alert data using IOCTL export-csv command."""
    print(f"[{tstamp()}] Processing alert data")
    
    # Generate timestamp for alerts filename
    import datetime
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    alerts_filename = f"alerts_{timestamp}.csv"
    
    # Change to Hafidz directory and export alerts to CSV
    alert_cmd = f'cd /d {hafidz_dir} && IOCTL.exe export-csv {alerts_filename} alerts'
    run_vm_command(alert_cmd, wait_time=5)
    
    print(f"[{tstamp()}] Alert data processing completed - saved as {alerts_filename}")

def process_telemetry_data():
    """Process telemetry data: rename dll, run python script, copy CSV files."""
    print(f"[{tstamp()}] Processing telemetry data")
    
    # Change to Hafidz directory and execute the sequence
    process_cmd = f"cd /d {hafidz_dir} && ren main.py.dll main.py && python main.py && copy *.csv {z_drive}"
    run_vm_command(process_cmd, wait_time=10)
    
    print(f"[{tstamp()}] Telemetry data processing completed")

def copy_csv_results():
    """Copy CSV results from VM to host."""
    print(f"[{tstamp()}] Copying CSV results to host")
    
    # Search for CSV files in the shared folder (Z: drive maps to shared folder)
    csv_search_cmd = f"dir {z_drive}\\*.csv /b"
    
    try:
        result = run_vm_command(csv_search_cmd)
        if result and result.stdout:
            csv_files = result.stdout.strip().split('\n')
            
            for csv_file in csv_files:
                csv_file = csv_file.strip()
                if csv_file and len(csv_file) > 0:  # Skip empty results
                    src_path = f"{z_drive}\\{csv_file}"
                    dst_path = f"{output_folder}/{csv_file}"
                    
                    # Copy file from VM to host
                    copy_cmd = f'{copyfrom_cmd} "{src_path}" "{dst_path}"'
                    os.system(copy_cmd)
                    print(f"[{tstamp()}] Copied {csv_file} to {output_folder}")
    except Exception as e:
        print(f"Error copying CSV files: {e}")
    
    print(f"[{tstamp()}] CSV results copying from vm to host completed")
    """Copy CSV results from sharedfolder to ScriptDataMalwareHafidz inside /home/uwuntu/Downloads"""
    print(f"[{tstamp()}] Copying CSV results from sharedfolder to ScriptDataMalwareHafidz")

    source_folder = "/home/uwuntu/Downloads/sharedfolder"
    destination_folder = "/home/uwuntu/Downloads/ScriptDataMalwareHafidz"

    try:
        # Pastikan folder tujuan ada
        os.makedirs(destination_folder, exist_ok=True)

        # Loop semua file CSV dan salin
        for filename in os.listdir(source_folder):
            if filename.lower().endswith(".csv"):
                src_path = os.path.join(source_folder, filename)
                dst_path = os.path.join(destination_folder, filename)

                shutil.copy2(src_path, dst_path)
                print(f"[{tstamp()}] Copied: {filename}")

    except Exception as e:
        print(f"[{tstamp()}] Error copying CSV files: {e}")

    print(f"[{tstamp()}] CSV results copying completed")

In [66]:
# Malware operations

def execute_malware(malware_path):
    print(f"[{tstamp()}] Executing malware sample: {malware_path}")

    full_cmd = [
        "vboxmanage", "guestcontrol", vm_name, "run",
        "--username", "wineleven",
        "--password", "therealtreasure",
        "--", "cmd.exe", "/c", "start", "", malware_path
    ]

    # Jalankan di background tanpa blocking
    subprocess.Popen(full_cmd)

    print(f"[{tstamp()}] Malware execution started")

def wait_malware(wait_time):
    """Wait for specified time with progress indicator."""
    print(f"[{tstamp()}] Waiting for {wait_time} seconds for malware execution")
    for i in range(wait_time):
        print('\r[' + '#' * (i+1) + '_' * (wait_time-i-1) + f'] {i+1}/{wait_time}s', end='')
        time.sleep(1)
    print('\r[' + '#' * wait_time + f'] {wait_time}/{wait_time}s - Complete!')

def terminate_malware(malware_name):
    """Terminate the malware process."""
    print(f"[{tstamp()}] Terminating malware: {malware_name}")
    
    # Kill process by executable name
    kill_cmd = f"taskkill /f /im {malware_name}"
    run_vm_command(kill_cmd, wait_time=2)
    
    # Also try to kill any suspicious processes
    suspicious_processes = ['*.exe', 'malware*', 'ransomware*']
    for process in suspicious_processes:
        try:
            kill_cmd = f"taskkill /f /im {process}"
            run_vm_command(kill_cmd, wait_time=1)
        except:
            pass  # Ignore errors for process that might not exist
    
    print(f"[{tstamp()}] Malware termination completed")

In [67]:
def run_kernel_telemetry_analysis(malware_path, malware_name, execution_time=60):
    """Run the complete kernel telemetry analysis workflow."""
    print(f"[{tstamp()}] Starting kernel telemetry analysis for {malware_path}")
    print(f"[{tstamp()}] Malware execution time: {execution_time} seconds")
    
    # Initialize timing
    t0 = time.time()
    
    try:
        # Step 1: Restore snapshot and start VM
        restore(snapshot_no)
        start_vm()
        time.sleep(5)  # Allow system to stabilize
        
        # Step 2: Install kernel driver
        install_kernel_driver()
        
        # Step 3: Start GP driver service
        start_gp_driver()
        
        # Step 4: Start IOCTL.exe
        start_ioctl()
        
        # Step 5: Execute malware (already present in VM)
        execute_malware(malware_path)
        
        # Step 6: Wait for malware execution
        wait_malware(execution_time)
        
        # Step 7: Take screenshot for evidence
        screenshot_name = f"{malware_name}_execution"
        take_screenshot(screenshot_name)
        
        # Step 8: Terminate malware
        terminate_malware(malware_name)
        
        # Step 9: Process alert data
        process_alert_data()
        
        # Step 10: Process telemetry data
        time.sleep(5)  # Allow system to settle after termination
        process_telemetry_data()
        
        # Step 11: Copy CSV results to host
        copy_csv_results()
        
        # Step 12: Take final screenshot
        final_screenshot = f"{malware_name}_final"
        take_screenshot(final_screenshot)
        
        # Step 13: Shutdown and restore VM
        time.sleep(3)
        shutdown_and_restore(snapshot_no)
        
    except Exception as e:
        print(f"Error during analysis: {e}")
        # Emergency cleanup
        try:
            terminate_malware(malware_name)
        except:
            pass
        shutdown_and_restore(snapshot_no)
    
    elapsed = time.time() - t0
    print(f'[{tstamp()}] Kernel telemetry analysis completed in {elapsed:.1f} seconds.\n')

## Run Analysis for a Single Malware Sample

Use this cell to analyze a single malware sample with kernel telemetry. The malware should already be present in the VM.

In [68]:
# Path to malware sample in VM and its executable name
malware_path = r"D:\new\ransomware_high_confidence\avoslocker\0b1f19ba8740b10ed017671aab023228756a6864fb008bf23f3c606189bdcd98.exe"  # Full path in VM
malware_name = "0b1f19ba8740b10ed017671aab023228756a6864fb008bf23f3c606189bdcd98.exe"  # Just the executable name for process killing

# Execution time in seconds (how long to let malware run)
execution_time = 5  # Default: 60 seconds

# Uncomment the line below to run analysis on a single sample
run_kernel_telemetry_analysis(malware_path, malware_name, execution_time)

[2025-05-22 14:21:23.866] Starting kernel telemetry analysis for D:\new\ransomware_high_confidence\avoslocker\0b1f19ba8740b10ed017671aab023228756a6864fb008bf23f3c606189bdcd98.exe
[2025-05-22 14:21:23.866] Malware execution time: 5 seconds
[2025-05-22 14:21:23.866] Restoring snapshot 33


0%...10%...20%...30%...40%...50%...60%...70%...80%...90%...100%


[2025-05-22 14:21:24.415] Restoring snapshot 33 done
[2025-05-22 14:21:24.416] Starting up VM
[2025-05-22 14:21:32.128] Starting up VM done
[2025-05-22 14:21:37.128] Installing kernel driver
[2025-05-22 14:21:37.129] Executing: cd /d C:\Users\wineleven\Documents\Hafidz\ && install.bat
[2025-05-22 14:21:42.507] Kernel driver installation completed
[2025-05-22 14:21:42.508] Starting GP driver service
[2025-05-22 14:21:42.508] Executing: cd /d C:\Users\wineleven\Documents\Hafidz\ && sc start gp-driver
[2025-05-22 14:21:46.634] GP driver service started
[2025-05-22 14:21:46.634] Starting IOCTL.exe
[2025-05-22 14:21:46.634] Executing: cd /d C:\Users\wineleven\Documents\Hafidz\ && IOCTL.exe
Command timed out: cd /d C:\Users\wineleven\Documents\Hafidz\ && IOCTL.exe
[2025-05-22 14:22:16.666] IOCTL.exe started
[2025-05-22 14:22:16.666] Executing malware sample: D:\new\ransomware_high_confidence\avoslocker\0b1f19ba8740b10ed017671aab023228756a6864fb008bf23f3c606189bdcd98.exe
[2025-05-22 14:22:16.

0%...10%...20%...30%...40%...50%...60%...70%...80%...90%...100%


[2025-05-22 14:23:09.854] Powering off VM done
[2025-05-22 14:23:12.855] Restoring snapshot 33


0%...10%...20%...30%...40%...50%...

[2025-05-22 14:23:13.488] Restoring snapshot 33 done
[2025-05-22 14:23:13.488] Kernel telemetry analysis completed in 109.6 seconds.



60%...70%...80%...90%...100%


## Run Analysis for Multiple Samples

Use this cell to analyze multiple malware samples already present in the VM.

In [None]:
import os
import time
from datetime import datetime
import json

# =================================================================================================
# CONFIGURATION SETTINGS
# =================================================================================================

class Config:
    """Konfigurasi untuk batch analysis"""
    
    # VM Settings
    VM_NAME = 'windows11-clone Clone'
    SNAPSHOT_NO = 34
    
    # Timing Settings
    DEFAULT_EXECUTION_TIME = 60      # Detik untuk eksekusi malware
    DEFAULT_DELAY_BETWEEN = 30       # Detik delay antar sample
    VM_STARTUP_WAIT = 15            # Detik menunggu VM siap
    VM_RECOVERY_WAIT = 10           # Detik menunggu setelah recovery
    
    # Directory Settings
    OUTPUT_FOLDER = 'kernel-telemetry-results'
    RESULTS_BACKUP = 'batch_analysis_results'
    
    # Malware Directories di VM
    MALWARE_DIRECTORIES = [
        "D:\\new\\ransomware_high_confidence\\avaddon",
        "D:\\new\\ransomware_high_confidence\\avoslocker", 
        "D:\\new\\ransomware_high_confidence\\babuk",
        "D:\\new\\ransomware_high_confidence\\bianlian",
        "D:\\new\\ransomware_high_confidence\\blackbasta",
        "D:\\new\\ransomware_high_confidence\\blackbyte",
        "D:\\new\\ransomware_high_confidence\\blackcat",
        "D:\\new\\ransomware_high_confidence\\blackmatter",
        "D:\\new\\ransomware_high_confidence\\bluesky",
        "D:\\new\\ransomware_high_confidence\\clop",
        "D:\\new\\ransomware_high_confidence\\conti",
        "D:\\new\\ransomware_high_confidence\\darkside",
        "D:\\new\\ransomware_high_confidence\\dharma",
        "D:\\new\\ransomware_high_confidence\\doppelpaymer",
        "D:\\new\\ransomware_high_confidence\\exorcist",
        "D:\\new\\ransomware_high_confidence\\gandcrab",
        "D:\\new\\ransomware_high_confidence\\hive",
        "D:\\new\\ransomware_high_confidence\\holyghost",
        "D:\\new\\ransomware_high_confidence\\karma",
        "D:\\new\\ransomware_high_confidence\\lockbit",
        "D:\\new\\ransomware_high_confidence\\lorenz",
        "D:\\new\\ransomware_high_confidence\\makop",
        "D:\\new\\ransomware_high_confidence\\maui",
        "D:\\new\\ransomware_high_confidence\\maze",
        "D:\\new\\ransomware_high_confidence\\mountlocker",
        "D:\\new\\ransomware_high_confidence\\nefilim",
        "D:\\new\\ransomware_high_confidence\\netwalker",
        "D:\\new\\ransomware_high_confidence\\nightsky",
        "D:\\new\\ransomware_high_confidence\\phobos",
        "D:\\new\\ransomware_high_confidence\\playcrypt",
        "D:\\new\\ransomware_high_confidence\\pysa",
        "D:\\new\\ransomware_high_confidence\\quantum",
        "D:\\new\\ransomware_high_confidence\\ragnarok",
        "D:\\new\\ransomware_high_confidence\\ransomexx",
        "D:\\new\\ransomware_high_confidence\\revil",
        "D:\\new\\ransomware_high_confidence\\ryuk",
        "D:\\new\\ransomware_high_confidence\\stop",
        "D:\\new\\ransomware_high_confidence\\thanos",
        "D:\\new\\ransomware_high_confidence\\wastedlocker",
        "D:\\new\\ransomware_high_confidence\\zeppelin"
    ]
    
    # File Extensions to scan
    FILE_EXTENSIONS = ['*.exe', '*.dll', '*.bin']
    
    # High Priority Families (for quick testing)
    PRIORITY_FAMILIES = [
        'avoslocker', 'lockbit', 'ryuk', 'conti', 'revil', 
        'maze', 'clop', 'dharma', 'phobos', 'stop'
    ]

# =================================================================================================
# CORE CLASSES
# =================================================================================================

class BatchAnalysisLogger:
    """Logger untuk batch analysis dengan formatting yang konsisten"""
    
    @staticmethod
    def timestamp():
        return datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
    
    @staticmethod
    def info(message, prefix="INFO"):
        print(f"[{BatchAnalysisLogger.timestamp()}] [{prefix}] {message}")
    
    @staticmethod
    def success(message):
        BatchAnalysisLogger.info(f"✅ {message}", "SUCCESS")
    
    @staticmethod
    def error(message):
        BatchAnalysisLogger.info(f"❌ {message}", "ERROR")
    
    @staticmethod
    def warning(message):
        BatchAnalysisLogger.info(f"⚠️  {message}", "WARNING")
    
    @staticmethod
    def progress(current, total, elapsed_time, message=""):
        progress_pct = (current / total) * 100
        estimated_total = (elapsed_time / current) * total if current > 0 else 0
        remaining = max(0, estimated_total - elapsed_time)
        
        print(f"[{BatchAnalysisLogger.timestamp()}] [PROGRESS] "
              f"{current}/{total} ({progress_pct:.1f}%) | "
              f"Elapsed: {elapsed_time/60:.1f}m | "
              f"Remaining: {remaining/60:.1f}m | {message}")

class MalwareFile:
    """Representasi file malware"""
    
    def __init__(self, family, filename, full_path, directory):
        self.family = family
        self.filename = filename
        self.full_path = full_path
        self.directory = directory
    
    def __str__(self):
        return f"{self.family}/{self.filename}"
    
    def to_dict(self):
        return {
            'family': self.family,
            'filename': self.filename,
            'full_path': self.full_path,
            'directory': self.directory
        }

class AnalysisResult:
    """Hasil analisis untuk satu sample"""
    
    def __init__(self, malware_file, success=False, error_message="", duration=0):
        self.malware_file = malware_file
        self.success = success
        self.error_message = error_message
        self.duration = duration
        self.timestamp = datetime.now()
    
    def to_dict(self):
        return {
            'family': self.malware_file.family,
            'filename': self.malware_file.filename,
            'path': self.malware_file.full_path,
            'success': self.success,
            'error': self.error_message,
            'duration': self.duration,
            'timestamp': self.timestamp.isoformat()
        }

class BatchAnalysisStats:
    """Statistik untuk batch analysis"""
    
    def __init__(self):
        self.total_samples = 0
        self.successful = 0
        self.failed = 0
        self.start_time = None
        self.end_time = None
        self.results = []
    
    def start(self):
        self.start_time = time.time()
    
    def finish(self):
        self.end_time = time.time()
    
    def add_result(self, result):
        self.results.append(result)
        if result.success:
            self.successful += 1
        else:
            self.failed += 1
    
    @property
    def duration(self):
        if self.start_time and self.end_time:
            return self.end_time - self.start_time
        return 0
    
    @property
    def success_rate(self):
        if self.total_samples == 0:
            return 0
        return (self.successful / self.total_samples) * 100
    
    def print_summary(self):
        print("\n" + "="*80)
        print("BATCH ANALYSIS SUMMARY")
        print("="*80)
        print(f"Total samples processed: {self.total_samples}")
        print(f"Successful analyses: {self.successful}")
        print(f"Failed analyses: {self.failed}")
        print(f"Success rate: {self.success_rate:.1f}%")
        print(f"Total duration: {self.duration/60:.1f} minutes")
        print(f"Average time per sample: {self.duration/max(1, self.total_samples):.1f} seconds")
        
        if self.failed > 0:
            print(f"\nFAILED SAMPLES ({self.failed}):")
            failed_results = [r for r in self.results if not r.success]
            for result in failed_results:
                print(f"  - {result.malware_file}: {result.error_message}")

# =================================================================================================
# VM MANAGEMENT FUNCTIONS
# =================================================================================================

class VMManager:
    """Manager untuk operasi VM"""
    
    @staticmethod
    def test_connection():
        """Test koneksi ke VM"""
        BatchAnalysisLogger.info("Testing VM connection...")
        
        try:
            # Start VM
            VMManager.start_clean_vm()
            
            # Test directory access
            test_dirs = Config.MALWARE_DIRECTORIES[:3]  # Test 3 directories
            accessible_dirs = 0
            
            for test_dir in test_dirs:
                BatchAnalysisLogger.info(f"Testing directory: {test_dir}")
                
                result = run_vm_command(f'dir "{test_dir}" /b', wait_time=5)
                if result and result.stdout and not "cannot find" in result.stdout.lower():
                    accessible_dirs += 1
                    BatchAnalysisLogger.success(f"Directory accessible: {test_dir}")
                else:
                    BatchAnalysisLogger.warning(f"Directory not accessible: {test_dir}")
            
            VMManager.shutdown_vm()
            
            if accessible_dirs > 0:
                BatchAnalysisLogger.success(f"VM connection test passed ({accessible_dirs}/{len(test_dirs)} directories accessible)")
                return True
            else:
                BatchAnalysisLogger.error("VM connection test failed - no directories accessible")
                return False
                
        except Exception as e:
            BatchAnalysisLogger.error(f"VM connection test failed: {str(e)}")
            try:
                VMManager.shutdown_vm()
            except:
                pass
            return False
    
    @staticmethod
    def start_clean_vm():
        """Start VM dengan snapshot bersih"""
        BatchAnalysisLogger.info("Starting VM from clean snapshot...")
        restore(Config.SNAPSHOT_NO)
        start_vm()
        time.sleep(Config.VM_STARTUP_WAIT)
        BatchAnalysisLogger.success("VM started successfully")
    
    @staticmethod
    def shutdown_vm():
        """Shutdown VM dan restore ke snapshot"""
        BatchAnalysisLogger.info("Shutting down VM...")
        shutdown_and_restore(Config.SNAPSHOT_NO)
        BatchAnalysisLogger.success("VM shutdown completed")
    
    @staticmethod
    def recover_vm():
        """Recovery VM jika terjadi error"""
        BatchAnalysisLogger.warning("Attempting VM recovery...")
        try:
            VMManager.shutdown_vm()
            time.sleep(Config.VM_RECOVERY_WAIT)
            BatchAnalysisLogger.success("VM recovery successful")
            return True
        except Exception as e:
            BatchAnalysisLogger.error(f"VM recovery failed: {str(e)}")
            return False

# =================================================================================================
# FILE SCANNING FUNCTIONS
# =================================================================================================

class MalwareScanner:
    """Scanner untuk mencari file malware di VM"""
    
    @staticmethod
    def scan_all_directories():
        """Scan semua direktori malware di VM"""
        BatchAnalysisLogger.info("Starting comprehensive malware scan...")
        
        try:
            VMManager.start_clean_vm()
            
            all_files = []
            family_stats = {}
            
            for directory in Config.MALWARE_DIRECTORIES:
                family_name = os.path.basename(directory.replace('\\', '/'))
                BatchAnalysisLogger.info(f"Scanning family: {family_name}")
                
                family_files = MalwareScanner._scan_single_directory(directory, family_name)
                all_files.extend(family_files)
                family_stats[family_name] = len(family_files)
                
                BatchAnalysisLogger.info(f"Found {len(family_files)} files in {family_name}")
            
            VMManager.shutdown_vm()
            
            # Print summary
            BatchAnalysisLogger.success(f"Scan completed - {len(all_files)} total files found")
            print(f"\nFAMILY BREAKDOWN:")
            print("-" * 50)
            for family, count in sorted(family_stats.items()):
                print(f"{family:20} : {count:3d} files")
            
            return all_files
            
        except Exception as e:
            BatchAnalysisLogger.error(f"Directory scan failed: {str(e)}")
            try:
                VMManager.shutdown_vm()
            except:
                pass
            return []
    
    @staticmethod
    def _scan_single_directory(directory, family_name):
        """Scan satu direktori untuk file malware"""
        files = []
        
        try:
            for ext in Config.FILE_EXTENSIONS:
                pattern = f"{directory}\\{ext}"
                list_cmd = f'dir "{pattern}" /b'
                
                result = run_vm_command(list_cmd, wait_time=3)
                
                if result and result.stdout:
                    file_lines = result.stdout.strip().split('\n')
                    
                    for file_line in file_lines:
                        file_line = file_line.strip()
                        
                        # Skip error messages
                        if (file_line and 
                            not file_line.startswith('File Not Found') and 
                            not file_line.startswith('The system cannot') and
                            file_line.lower().endswith(('.exe', '.dll', '.bin'))):
                            
                            filename = file_line.split()[-1] if ' ' in file_line else file_line
                            full_path = f"{directory}\\{filename}"
                            
                            malware_file = MalwareFile(
                                family=family_name,
                                filename=filename,
                                full_path=full_path,
                                directory=directory
                            )
                            
                            files.append(malware_file)
        
        except Exception as e:
            BatchAnalysisLogger.warning(f"Error scanning {directory}: {str(e)}")
        
        return files

# =================================================================================================
# BATCH ANALYSIS ENGINE
# =================================================================================================

class BatchAnalysisEngine:
    """Engine utama untuk batch analysis"""
    
    def __init__(self):
        self.stats = BatchAnalysisStats()
        
        # Create output directories
        os.makedirs(Config.OUTPUT_FOLDER, exist_ok=True)
        os.makedirs(Config.RESULTS_BACKUP, exist_ok=True)
    
    def run_analysis(self, malware_files, execution_time=None, delay_between=None):
        """Run batch analysis pada list of malware files"""
        
        if not malware_files:
            BatchAnalysisLogger.error("No malware files provided for analysis")
            return self.stats
        
        # Set defaults
        execution_time = execution_time or Config.DEFAULT_EXECUTION_TIME
        delay_between = delay_between or Config.DEFAULT_DELAY_BETWEEN
        
        # Initialize stats
        self.stats.total_samples = len(malware_files)
        self.stats.start()
        
        BatchAnalysisLogger.info(f"Starting batch analysis of {len(malware_files)} samples")
        BatchAnalysisLogger.info(f"Execution time per sample: {execution_time}s")
        BatchAnalysisLogger.info(f"Delay between samples: {delay_between}s")
        
        # Process each malware file
        for idx, malware_file in enumerate(malware_files, 1):
            BatchAnalysisLogger.info(f"Processing sample {idx}/{len(malware_files)}: {malware_file}")
            
            # Run single analysis
            result = self._analyze_single_sample(malware_file, execution_time)
            self.stats.add_result(result)
            
            # Progress update
            elapsed = time.time() - self.stats.start_time
            BatchAnalysisLogger.progress(idx, len(malware_files), elapsed, str(malware_file))
            
            # Save intermediate results
            self._save_intermediate_results(malware_file.family)
            
            # Delay before next sample (except last one)
            if idx < len(malware_files):
                BatchAnalysisLogger.info(f"Waiting {delay_between}s before next sample...")
                time.sleep(delay_between)
        
        # Finalize stats
        self.stats.finish()
        
        # Save final results
        self._save_final_results()
        
        # Print summary
        self.stats.print_summary()
        
        return self.stats
    
    def _analyze_single_sample(self, malware_file, execution_time):
        """Analyze single malware sample"""
        start_time = time.time()
        
        try:
            # Run the actual analysis using existing function
            run_kernel_telemetry_analysis(
                malware_path=malware_file.full_path,
                malware_name=malware_file.filename,
                execution_time=execution_time
            )
            
            duration = time.time() - start_time
            BatchAnalysisLogger.success(f"Analysis completed for {malware_file}")
            
            return AnalysisResult(malware_file, success=True, duration=duration)
            
        except Exception as e:
            duration = time.time() - start_time
            error_msg = str(e)
            
            BatchAnalysisLogger.error(f"Analysis failed for {malware_file}: {error_msg}")
            
            # Attempt VM recovery
            if VMManager.recover_vm():
                BatchAnalysisLogger.info("VM recovery successful, continuing with next sample")
            else:
                BatchAnalysisLogger.warning("VM recovery failed, attempting to continue")
            
            return AnalysisResult(malware_file, success=False, error_message=error_msg, duration=duration)
    
    def _save_intermediate_results(self, family):
        """Save intermediate results for a family"""
        family_results = [r for r in self.stats.results if r.malware_file.family == family]
        
        if family_results:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"{Config.RESULTS_BACKUP}/batch_results_{family}_{timestamp}.json"
            
            data = {
                'family': family,
                'timestamp': timestamp,
                'total': len(family_results),
                'successful': len([r for r in family_results if r.success]),
                'results': [r.to_dict() for r in family_results]
            }
            
            with open(filename, 'w') as f:
                json.dump(data, f, indent=2)
    
    def _save_final_results(self):
        """Save final comprehensive results"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{Config.RESULTS_BACKUP}/batch_analysis_final_{timestamp}.json"
        
        data = {
            'summary': {
                'total_samples': self.stats.total_samples,
                'successful': self.stats.successful,
                'failed': self.stats.failed,
                'success_rate': self.stats.success_rate,
                'duration_minutes': self.stats.duration / 60,
                'timestamp': timestamp
            },
            'results': [r.to_dict() for r in self.stats.results]
        }
        
        with open(filename, 'w') as f:
            json.dump(data, f, indent=2)
        
        BatchAnalysisLogger.success(f"Final results saved to {filename}")

# =================================================================================================
# HIGH-LEVEL API FUNCTIONS
# =================================================================================================

class BatchAnalysisAPI:
    """High-level API untuk batch analysis"""
    
    @staticmethod
    def test_vm_connection():
        """Test koneksi ke VM"""
        return VMManager.test_connection()
    
    @staticmethod
    def scan_available_malware():
        """Scan semua malware yang tersedia di VM"""
        return MalwareScanner.scan_all_directories()
    
    @staticmethod
    def quick_test(families=None, samples_per_family=1, execution_time=30):
        """Quick test dengan sample minimal"""
        if families is None:
            families = Config.PRIORITY_FAMILIES[:3]  # Top 3 families
        
        BatchAnalysisLogger.info(f"Running quick test with families: {families}")
        return BatchAnalysisAPI._run_family_analysis(families, samples_per_family, execution_time)
    
    @staticmethod
    def medium_test(families=None, samples_per_family=3, execution_time=60):
        """Medium test dengan sample sedang"""
        if families is None:
            families = Config.PRIORITY_FAMILIES[:5]  # Top 5 families
        
        BatchAnalysisLogger.info(f"Running medium test with families: {families}")
        return BatchAnalysisAPI._run_family_analysis(families, samples_per_family, execution_time)
    
    @staticmethod
    def large_test(families=None, samples_per_family=5, execution_time=60):
        """Large test dengan banyak sample"""
        if families is None:
            families = Config.PRIORITY_FAMILIES  # All priority families
        
        BatchAnalysisLogger.info(f"Running large test with families: {families}")
        return BatchAnalysisAPI._run_family_analysis(families, samples_per_family, execution_time)
    
    @staticmethod
    def analyze_specific_families(families, samples_per_family=10, execution_time=60):
        """Analyze specific families dengan jumlah sample tertentu"""
        BatchAnalysisLogger.info(f"Running analysis for specific families: {families}")
        return BatchAnalysisAPI._run_family_analysis(families, samples_per_family, execution_time)
    
    @staticmethod
    def full_analysis(execution_time=60):
        """Full analysis semua malware (HATI-HATI!)"""
        BatchAnalysisLogger.warning("Starting FULL ANALYSIS - this will take a very long time!")
        
        # Scan all available files
        all_files = MalwareScanner.scan_all_directories()
        
        if not all_files:
            BatchAnalysisLogger.error("No malware files found for analysis")
            return None
        
        BatchAnalysisLogger.warning(f"This will analyze {len(all_files)} malware samples!")
        estimated_hours = (len(all_files) * 2) / 60  # Rough estimate
        BatchAnalysisLogger.warning(f"Estimated time: {estimated_hours:.1f} hours")
        
        confirm = input("\nAre you absolutely sure? Type 'YES' to continue: ")
        if confirm != 'YES':
            BatchAnalysisLogger.info("Full analysis cancelled by user")
            return None
        
        # Run analysis
        engine = BatchAnalysisEngine()
        return engine.run_analysis(all_files, execution_time)
    
    @staticmethod
    def _run_family_analysis(families, samples_per_family, execution_time):
        """Internal function untuk run analysis berdasarkan families"""
        try:
            # Start VM and scan
            VMManager.start_clean_vm()
            
            # Get all available files
            all_files = []
            for directory in Config.MALWARE_DIRECTORIES:
                family_name = os.path.basename(directory.replace('\\', '/'))
                
                if family_name.lower() in [f.lower() for f in families]:
                    family_files = MalwareScanner._scan_single_directory(directory, family_name)
                    all_files.extend(family_files[:samples_per_family])
            
            VMManager.shutdown_vm()
            
            if not all_files:
                BatchAnalysisLogger.error("No malware files found for specified families")
                return None
            
            BatchAnalysisLogger.info(f"Selected {len(all_files)} files for analysis")
            
            # Run analysis
            engine = BatchAnalysisEngine()
            return engine.run_analysis(all_files, execution_time)
            
        except Exception as e:
            BatchAnalysisLogger.error(f"Family analysis failed: {str(e)}")
            try:
                VMManager.shutdown_vm()
            except:
                pass
            return None
    
    @staticmethod
    def estimate_time(num_families=5, samples_per_family=5, execution_time=60):
        """Estimasi waktu untuk analisis"""
        total_samples = num_families * samples_per_family
        time_per_sample = execution_time + Config.DEFAULT_DELAY_BETWEEN + 30  # overhead
        total_minutes = (total_samples * time_per_sample) / 60
        
        print(f"\n📊 ANALYSIS TIME ESTIMATION")
        print("="*50)
        print(f"Families: {num_families}")
        print(f"Samples per family: {samples_per_family}")
        print(f"Total samples: {total_samples}")
        print(f"Execution time per sample: {execution_time}s")
        print(f"Estimated total time: {total_minutes:.1f} minutes ({total_minutes/60:.1f} hours)")
        
        return total_minutes
    
    @staticmethod
    def check_results():
        """Check hasil analisis yang tersimpan"""
        results_found = False
        
        # Check main results folder
        if os.path.exists(Config.OUTPUT_FOLDER):
            csv_files = [f for f in os.listdir(Config.OUTPUT_FOLDER) if f.endswith('.csv')]
            if csv_files:
                print(f"\n📁 CSV Results in {Config.OUTPUT_FOLDER}:")
                for f in csv_files:
                    print(f"  - {f}")
                results_found = True
        
        # Check backup results
        if os.path.exists(Config.RESULTS_BACKUP):
            json_files = [f for f in os.listdir(Config.RESULTS_BACKUP) if f.endswith('.json')]
            if json_files:
                print(f"\n📁 Analysis Results in {Config.RESULTS_BACKUP}:")
                for f in json_files:
                    print(f"  - {f}")
                results_found = True
        
        if not results_found:
            print("📁 No analysis results found")
        
        return results_found

# =================================================================================================
# MAIN INTERFACE
# =================================================================================================

def main():
    """Main interface untuk batch analysis"""
    
    print("="*80)
    print("🔬 AUTOMATED MALWARE ANALYSIS SYSTEM - VM BATCH PROCESSING")
    print("="*80)
    print("Version 2.0 - Enhanced VM Integration")
    print(f"VM: {Config.VM_NAME}")
    print(f"Snapshot: {Config.SNAPSHOT_NO}")
    print(f"Malware Families: {len(Config.MALWARE_DIRECTORIES)}")
    print("="*80)
    
    print("\n🚀 AVAILABLE OPERATIONS:")
    print("-" * 50)
    print("1. test_connection()           - Test VM connection")
    print("2. scan_malware()             - Scan available malware files")
    print("3. quick_test()               - Quick test (3 families, 1 sample each)")
    print("4. medium_test()              - Medium test (5 families, 3 samples each)")
    print("5. large_test()               - Large test (10 families, 5 samples each)")
    print("6. analyze_families()         - Analyze specific families")
    print("7. full_analysis()            - Full analysis (ALL malware)")
    print("8. estimate_time()            - Estimate analysis time")
    print("9. check_results()            - Check saved results")
    
    print("\n💡 QUICK START GUIDE:")
    print("-" * 50)
    print("1. First: test_connection()    # Test VM connectivity")
    print("2. Then:  scan_malware()       # See available files")  
    print("3. Start: quick_test()         # Run small test")
    print("4. Scale: medium_test()        # If quick test works")
    print("5. Prod:  analyze_families(['avoslocker', 'lockbit'], 10)")

# Create convenient aliases
test_connection = BatchAnalysisAPI.test_vm_connection
scan_malware = BatchAnalysisAPI.scan_available_malware
quick_test = BatchAnalysisAPI.quick_test
medium_test = BatchAnalysisAPI.medium_test
large_test = BatchAnalysisAPI.large_test
analyze_families = BatchAnalysisAPI.analyze_specific_families
full_analysis = BatchAnalysisAPI.full_analysis
estimate_time = BatchAnalysisAPI.estimate_time
check_results = BatchAnalysisAPI.check_results

# Initialize the system
if __name__ == "__main__":
    main()
else:
    # When imported as module, just show the interface
    main()

## Manual Operations

Use these cells to perform individual operations if needed for testing or debugging.

In [None]:
def check_vm_status():
    """Check if VM is running."""
    try:
        output = subprocess.check_output(f'vboxmanage showvminfo "{vm_name}" | grep State', shell=True).decode()
        print(output.strip())
    except:
        print("Error checking VM status")

def manual_restore():
    """Manually restore VM to snapshot 31."""
    restore(snapshot_no)

def manual_start():
    """Manually start the VM."""
    start_vm()
    
def manual_shutdown():
    """Manually shutdown the VM."""
    print(f"[{tstamp()}] Powering off VM")
    try:
        check_output_t(f'vboxmanage controlvm "{vm_name}" poweroff')
        print(f"[{tstamp()}] Powering off VM done")
    except:
        print('VM (probably) already off')

def manual_install_driver():
    """Manually install kernel driver."""
    install_kernel_driver()

def manual_start_driver():
    """Manually start GP driver."""
    start_gp_driver()

def manual_process_telemetry():
    """Manually process telemetry data."""
    process_telemetry_data()

def manual_copy_results():
    """Manually copy CSV results."""
    copy_csv_results()

In [None]:
# Check VM status
# check_vm_status()

In [None]:
# Manually restore VM to snapshot 31
# manual_restore()

In [None]:
# Manually start VM
# manual_start()

In [None]:
# Manually install kernel driver
# manual_install_driver()

In [None]:
# Manually start GP driver service
# manual_start_driver()

In [None]:
# Manually process telemetry data
# manual_process_telemetry()

In [None]:
# Manually copy CSV results
# manual_copy_results()

In [None]:
# Manually shutdown VM
# manual_shutdown()

## Debug and Testing Functions

Use these functions for debugging individual components.

In [None]:
def test_vm_communication():
    """Test basic communication with VM."""
    print("Testing VM communication...")
    result = run_vm_command("echo Hello from VM!")
    if result:
        print(f"VM Response: {result.stdout}")
    else:
        print("VM communication failed")

def test_driver_status():
    """Check GP driver service status."""
    print("Checking GP driver status...")
    result = run_vm_command("sc query gp-driver")
    if result:
        print(f"Driver Status: {result.stdout}")

def list_hafidz_directory():
    """List contents of Hafidz directory."""
    print("Listing Hafidz directory contents...")
    result = run_vm_command(f"dir {hafidz_dir}")
    if result:
        print(f"Directory contents:\n{result.stdout}")

def list_desktop_files():
    """List files on desktop to see available malware samples."""
    print("Listing desktop files...")
    result = run_vm_command("dir C:\\Users\\wineleven\\Desktop\\*.exe")
    if result:
        print(f"Available executables:\n{result.stdout}")

In [None]:
# Test VM communication
# test_vm_communication()

In [None]:
# Test driver status
# test_driver_status()

In [None]:
# List Hafidz directory
# list_hafidz_directory()

In [None]:
# List available malware samples on desktop
# list_desktop_files()