In [3]:
import numpy as np
import yara  # pip install yara-python


Simulate a PE-like file byte array with an embedded signature pattern

In [4]:
# Seed for reproducibility
np.random.seed(123)
#Simulate a PE file with a normal structure and some “extra space” to hide malware patterns

file_length = 6000 # PE-like file size: 6000 bytes
critical_len = 5000 #bytes representing the header + sections (core structure).
overlay_len = file_length - critical_len #bytes representing the overlay section (extra/non-critical data where malware often hides)

# Generate random bytes for header+sections (critical) mimicking real PE headers/sections
critical_bytes = np.random.randint(1, 256, critical_len, dtype=np.uint8)

# Overlay bytes: initialize as random bytes non-critical, suitable for inserting malware patterns
overlay_bytes = np.random.randint(1, 256, overlay_len, dtype=np.uint8)

#  Simulate a PE file that would trigger a signature-based detection (like a YARA rule).
signature_pattern = b'\xDE\xAD\xBE\xEF\xCA\xFE'  # example hex pattern
start_pos = critical_len + 100  # inside overlay does not break the PE file, but is still detectable by static signature scanners like YARA
overlay_bytes[start_pos - critical_len : start_pos - critical_len + len(signature_pattern)] = np.frombuffer(signature_pattern, dtype=np.uint8)
# Effect: Embeds the known signature into the overlay without modifying the critical PE structure, simulating a detectable malware sample.

# Combine the critical section and overlay into a single byte array representing the full PE-like file.
file_bytes = np.concatenate([critical_bytes, overlay_bytes])


Define YARA rule that matches the signature pattern

In [5]:
#Define and compile a YARA rule to detect the inserted malware signature in the file.
rule_source = '''
rule MalwareSignature
{
    strings:
        $sig = { DE AD BE EF CA FE }
    condition:
        $sig
}
'''
rules = yara.compile(source=rule_source)


Check if YARA matches before modifications

In [6]:
#Detect the malware signature in the original file before any changes.

matches_before = rules.match(data=file_bytes.tobytes())
print(f"YARA matches before changes: {[rule.rule for rule in matches_before]}")


YARA matches before changes: ['MalwareSignature']


Apply minimal byte changes in overlay to evade signature

In [8]:
# Block 5 — Apply minimal byte changes in overlay to evade signature
file_bytes_adv = file_bytes.copy()

#position in the full PE-like file where the malware signature was inserted.
flip_index = start_pos + 2  # Change 3rd byte in pattern
original_byte = file_bytes_adv[flip_index]
file_bytes_adv[flip_index] = np.uint8((int(original_byte) + 1) % 256)  # safe uint8 wrap-around
#Change one byte of the signature by +1 (with wrap-around) to minimally alter it and attempt to evade detection

print(f"Modified byte at position {flip_index} from {original_byte} to {file_bytes_adv[flip_index]}")


Modified byte at position 5102 from 190 to 191


In [2]:
import numpy as np

# Example: original malware signature inserted in a file
file_bytes_adv = np.array([0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE], dtype=np.uint8)

# Display original signature bytes in decimal and hex
print("Original signature bytes:")
for i, b in enumerate(file_bytes_adv):
    mark = " ← 3rd byte (to modify)" if i == 2 else ""
    print(f"{b:02X} → {int(b)}{mark}")

# Modify the 3rd byte (index 2) by +1 with wrap-around
flip_index = 2
original_byte = file_bytes_adv[flip_index]
file_bytes_adv[flip_index] = np.uint8((int(original_byte) + 1) % 256)

# Display modified byte
print(f"\nModified byte at position {flip_index}: {original_byte} (0x{original_byte:02X}) → {file_bytes_adv[flip_index]} (0x{file_bytes_adv[flip_index]:02X})")

Original signature bytes:
DE → 222
AD → 173
BE → 190 ← 3rd byte (to modify)
EF → 239
CA → 202
FE → 254

Modified byte at position 2: 190 (0xBE) → 191 (0xBF)


Check if YARA matches after minimal changes

In [9]:
#Check if the modified file still matches the YARA rule; demonstrates whether the minimal byte change successfully evaded detection
matches_after = rules.match(data=file_bytes_adv.tobytes())
print(f"YARA matches after changes: {[rule.rule for rule in matches_after]}")


YARA matches after changes: []
