Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
199 lines (171 sloc) 7.99 KB
# REvil malware encrypted string decoder
# Reverse engineered by Lloyd Macrohon
import string
ida_available = True
try:
import idautils
import idc
import ida_bytes
import idaapi
except ImportError:
ida_available = False
def decode(key, data):
""" REvil string decoder, this is just RC4 """
# initialize sbox and key scheduling
sbox, j = [a for a in range(256)], 0
for i in range(256):
j = (j + key[i % len(key)] + sbox[i]) & 0xff
sbox[i], sbox[j] = sbox[j], sbox[i]
# this is the actual decryption of data to output
i, j, output = 0, 0, bytearray(len(data))
for k in range(len(data)):
i = (i + 1) & 0xff
j = (j + sbox[i]) & 0xff
sbox[i], sbox[j] = sbox[j], sbox[i]
t = (sbox[i] + sbox[j]) & 0xff
# here is the actual decryption of the data
output[k] = sbox[t] ^ data[k]
return output
def decode_data(data, offset, keyLen, dataLen):
"""
This reflects decoding of string in the REvil code
e.g.
decode_string(aSC, 1890, 16, 226, (BYTE *)buf);
where aSC is a static string in the image
this can be translated to this Python code as:
buf = decode_string(aSC, 1890, 16, 226)
"""
dataOffset = offset + keyLen
return decode(data[offset:offset + keyLen], data[dataOffset:dataOffset + dataLen])
if ida_available:
def decode_string(data, keyLen, dataLen):
return decode(data[:keyLen], data[keyLen:keyLen + dataLen])
def data_to_str(data):
""" Annotate whether this was a wchar_t or not """
# if every odd char is zero, then let's assume this is ascii as wchar_t
if not any(data[1::2]):
return 'wchar_t: ' + ''.join(map(chr, data[0::2]))
return ''.join([chr(d) if chr(d) in string.printable else ("\\%02x" % d) for d in data])
def get_decode_xrefs():
"""
Find all cross-refernces to 0x132549a in our REvil malware sample.
Decode the string and annotate the IDA database, this will make analysis a lot
easier
"""
for xref in idautils.XrefsTo(0x132549a):
# first of all, we need to find the arguments to this function, the signature is:
# BYTE* __cdecl decode_string(char* base, int keyOffset, int keyLen, int dataLen, BYTE* pOut);
args = get_decode_args(xref.frm)
if args:
base, key_offset, key_len, data_len = args
# get the data from the image, data = [keyBytes][encryptedData]
data = ida_bytes.get_bytes(base + key_offset, key_len + data_len)
str = data_to_str(decode_string(data, key_len, data_len))
print("0x%08x: %s" % (xref.frm, str))
# put a comment in the code
cfunc = idaapi.decompile(xref.frm)
if cfunc is not None:
tl = idaapi.treeloc_t()
tl.ea = xref.frm
tl.itp = idaapi.ITP_SEMI
cfunc.set_user_cmt(tl, str)
cfunc.save_user_cmts()
idaapi.set_cmt(int(xref.frm), str, True)
else:
# We could not get the arguments, likely because it may be a register and not an immediate
# value, so we'd need to go back further and find what value that register was assigned with.
# Would be easier to just tell user, and let him decode it manually (HexRays has the actual args)
print("0x%08x: Could not decode arguments" % xref.frm)
def get_decode_args(addr):
""" Find the arguments for function call made from addr """
args = [] # the list of arguments to the function (4 values)
regs = {} # if this was a push <reg>, then we'd need to resolve what the value of this reg was
count = 0
while len(args) < 4 or len(regs) > 0:
# starting at addr, keep going back a few instructions looking for push to the stack
addr = idc.prev_head(addr)
inst = idautils.DecodeInstruction(addr)
if not inst:
continue
# if we get a push instruction and we don't have 4 arguments yet, add this to the list of args
# if we have more than 4 args, it means one or more args were registers and we need to resolve
# value of the register
if inst.get_canon_mnem() == 'push' and len(args) < 4:
if inst.ops[0].type == 1:
val = inst.ops[0].reg
# print("push reg %d" % val)
# resolve this (only for mov esi, offset x)
if len(args) < 4:
regs[inst.ops[0].reg] = len(args)
else:
val = inst.ops[0].value
# print("push 0x%08x (%d)" % (val, val))
if len(args) < 4:
args.append(val)
elif inst.get_canon_mnem() == 'mov':
# we have a mov instr, see if this is a mov to a register we're interested in
if inst.ops[0].type == 1:
reg = inst.ops[0].reg
if reg in regs:
if inst.ops[1].type == 5:
args[regs[reg]] = inst.ops[1].value
else:
print("[*] warning, mov operand not supported")
del (regs[reg])
# we can also resolve registers checking for pop, e.g. pop ebx
# or other insts like: lea eax, [ebp+var_50]; push eax;
count += 1
if count > 20:
# don't let this go on forever, give up after 20 instructions
return None
return args
def decode_at_ea(addr, offset, keyLen, dataLen):
""" use for manually decoding with IDA and will put a comment at screen_ea """
data = ida_bytes.get_bytes(addr + offset, keyLen + dataLen)
cmt = data_to_str(decode_string(data, keyLen, dataLen))
print("%s" % cmt)
ea = idc.get_screen_ea()
idaapi.set_cmt(ea, cmt, True) # disassembly comment
cfunc = idaapi.decompile(ea)
if cfunc is not None:
# decompiled comment
tl = idaapi.treeloc_t()
tl.ea = ea
tl.itp = idaapi.ITP_SEMI
cfunc.set_user_cmt(tl, cmt)
cfunc.save_user_cmts()
def decode_test():
key = b'\x3E\x64\xDA\x5C\xBF\xE8\xA0\xAF\xA7\x89\x94'
data = b'\x19\xCE\xE6\xDB\x36\xA5\x6B\x63\x37\xFD\x16\xFF\xDB\x7D\x89\x41'
print("Sample decoding of ", data)
print(decode(key, data).decode('utf16'))
print(decode(key, data))
# you can also export the static string to a file using hiew or IDA
try:
with open('encrypted.bin', 'rb') as f:
data = f.read()
# here are some offsets and lengths taken from ida
print(decode_string(data, 402, 11, 2).decode('ascii'))
print(decode_string(data, 1548, 15, 3).decode('ascii'))
print(decode_string(data, 2875, 7, 3).decode('ascii'))
print(decode_string(data, 309, 16, 3).decode('ascii'))
except FileNotFoundError:
print("No test data: encrypted.bin")
if __name__ == '__main__':
if ida_available:
get_decode_xrefs()
print("""
To decode manually, select the function calling the decode in HexRays or disassembly then
call decode_at_ea(data, offset, keylen, datalen)
Example:
if ( !*v0 )
{
decode_string(aSC, 906, 13, 18, pOut);
^- put screen cursor here
.data:0133CB90 aSC db 'š',0Bh,'C<'
decode as:
> decode_at_ea(0x0133cb90, 906, 13, 18)
and ensure screen cursor is at decode_string in HexRays (otherwise comment will be lost)
""")
else:
decode_test()
You can’t perform that action at this time.