# 10 Python Libraries for Malware Analysis and Reverse Engineering

You can fin more details from the original post in my [blog](http://blog.securitybreak.io/)

## 1 - Pefile

In [140]:
# pip install pefile
import pefile

exefile = "calc.exe"
exe = pefile.PE(exefile)
print(exe.__doc__)

A Portable Executable representation.

    This class provides access to most of the information in a PE file.

    It expects to be supplied the name of the file to load or PE data
    to process and an optional argument 'fast_load' (False by default)
    which controls whether to load all the directories information,
    which can be quite time consuming.

    pe = pefile.PE('module.dll')
    pe = pefile.PE(name='module.dll')

    would load 'module.dll' and process it. If the data is already
    available in a buffer the same can be achieved with:

    pe = pefile.PE(data=module_dll_data)

    The "fast_load" can be set to a default by setting its value in the
    module itself by means, for instance, of a "pefile.fast_load = True".
    That will make all the subsequent instances not to load the
    whole PE structure. The "full_load" method can be used to parse
    the missing data at a later stage.

    Basic headers information will be available in the attributes:

    DOS_HEADER

## 2 - Pelief

In [141]:
# pip install lief

import lief

# ELF
#binary = lief.parse("/usr/bin/ls")
#print(binary)

# PE
binary = lief.parse("calc.exe")
print(binary.__doc__)


      Class which represents a PE binary which is the main interface
      to manage and modify a PE executable.

      This object can be instantiated through :func:`lief.parse` or :func:`lief.PE.parse` while
      the constructor of this object can be used to craft a binary from scratch (see: :ref:`02-pe-from-scratch`)
      


## 3 - Capstone

In [34]:
#pip install capstone

from capstone import *

exefile = "calc.exe"
exe = pefile.PE(exefile)

entry_point = exe.OPTIONAL_HEADER.AddressOfEntryPoint
data = exe.get_memory_mapped_image()[entry_point:]
cs = Cs(CS_ARCH_X86, CS_MODE_32)
rdbin = cs.disasm(data, 0x1000)

for i in rdbin:
    print("0x%x:\t%s\t%s" %(i.address, i.mnemonic, i.op_str))

0x1000:	dec	eax
0x1001:	sub	esp, 0x28
0x1004:	call	0xa24
0x1009:	dec	eax
0x100a:	add	esp, 0x28
0x100d:	jmp	0xd80
0x1012:	int3	
0x1013:	int3	
0x1014:	int3	
0x1015:	int3	
0x1016:	int3	
0x1017:	int3	
0x1018:	jno	0xfad
0x101a:	rcl	dword ptr [edx], cl
0x101c:	sar	dword ptr [edi], 0xc1
0x101f:	jecxz	0x1069
0x1021:	sub	esp, 0x28
0x1024:	dec	eax
0x1025:	mov	eax, dword ptr [ecx]
0x1027:	cmp	dword ptr [eax], 0xe06d7363
0x102d:	jne	0x1052
0x102f:	cmp	dword ptr [eax + 0x18], 4
0x1033:	jne	0x1052
0x1035:	mov	ecx, dword ptr [eax + 0x20]
0x1038:	lea	eax, [ecx - 0x19930520]
0x103e:	cmp	eax, 2
0x1041:	jbe	0x104b
0x1043:	cmp	ecx, 0x1994000
0x1049:	jne	0x1052
0x104b:	call	dword ptr [0xaaf]
0x1051:	int3	
0x1052:	xor	eax, eax
0x1054:	dec	eax
0x1055:	add	esp, 0x28
0x1058:	ret	
0x1059:	int3	
0x105a:	int3	
0x105b:	int3	
0x105c:	int3	
0x105d:	int3	
0x105e:	int3	
0x105f:	int3	
0x1060:	int3	
0x1061:	int3	
0x1062:	int3	
0x1063:	int3	
0x1064:	int3	
0x1065:	int3	
0x1066:	int3	
0x1067:	int3	
0x1068:	jno	0x109a
0x106

## 4 - Unicorn

In [16]:
# pip install unicorn

from unicorn import *
from unicorn.x86_const import *

# code to be emulated
X86_CODE32 = b"\x41\x4a" # INC ecx; DEC edx

# memory address where emulation starts
ADDRESS = 0x1000000

print("Emulate i386 code")
try:
    # Initialize emulator in X86-32bit mode
    mu = Uc(UC_ARCH_X86, UC_MODE_32)

    # map 2MB memory for this emulation
    mu.mem_map(ADDRESS, 2 * 1024 * 1024)

    # write machine code to be emulated to memory
    mu.mem_write(ADDRESS, X86_CODE32)

    # initialize machine registers
    mu.reg_write(UC_X86_REG_ECX, 0x1234)
    mu.reg_write(UC_X86_REG_EDX, 0x7890)

    # emulate code in infinite time & unlimited instructions
    mu.emu_start(ADDRESS, ADDRESS + len(X86_CODE32))

    # now print out some registers
    print("Emulation done. Below is the CPU context")

    r_ecx = mu.reg_read(UC_X86_REG_ECX)
    r_edx = mu.reg_read(UC_X86_REG_EDX)
    print(">>> ECX = 0x%x" %r_ecx)
    print(">>> EDX = 0x%x" %r_edx)

except UcError as e:
    print("ERROR: %s" % e)

Emulate i386 code
Emulation done. Below is the CPU context
>>> ECX = 0x1235
>>> EDX = 0x788f


## 5 - rzpipe

pip install rzpipe + https://github.com/rizinorg/rizin/releases/tag/v0.3.2

In [143]:
import rzpipe

pipe = rzpipe.open("calc.exe")
pipe.cmd('aa')

print(pipe.cmd("afl"))
print(pipe.cmdj("aflj"))            # evaluates JSON and returns an object
print(pipe.cmdj("ij").core.format)  # shows file format

pipe.quit()

## 6 - Frida Python

In [25]:
from __future__ import print_function
import frida
import sys
import psutil


def checkIfProcessRunning(processName):
    '''
    Check if there is any running process that contains the given name processName.
    '''
    #Iterate over the all the running process
    for proc in psutil.process_iter():
        try:
            # Check if process name contains the given name string.
            if processName.lower() in proc.name().lower():
                return True
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass
    return False;
	

def on_message(message, data):
    print(message)
	

def main(proc):
    if checkIfProcessRunning(proc):
        print('[+] Process %s is running!' % proc)
        session = frida.attach(proc)

    else:
        print('[!] Process %s was not running!' % proc)
        print('[+] Running process %s!' % proc)
        session = frida.spawn(proc)
        session = frida.attach(proc)


    '''print(type(session))'''
    script = session.create_script("""
	
					   var Mutex_addr = Module.findExportByName("kernel32.dll", "CreateMutexA")
					   console.log('[+] CreateMutex addr: ' + Mutex_addr);
					   Interceptor.attach(Mutex_addr,
					   {
							onEnter: function (args) 
							{
								console.log("[+] Entering to createMutex")
								console.log('[+] lpName: ' + Memory.readUtf8String(args[2]));
							
							},
							onLeave: function (retval)
							{
							
							}
						});
 
                  """)

    script.on('message', on_message)
    script.load()
    try:
        frida.resume(proc)
    except:
        pass
    sys.stdin.read()


if __name__ == "__main__":

    try:
        target_process = int(sys.argv[1])
    except ValueError:
        target_process = "calc.exe"

    main(target_process)

[!] Process calc.exe was not running!
[+] Running process calc.exe!
[+] CreateMutex addr: 0x7ffc76952ce0


## 7 - Qiling

In [None]:
# pip3 install qiling
from qiling import *
from qiling.const import *
import sys
import pefile
from capstone import *


# Load PE with capstone
def loader_pe(pe):
    entry_point = pe.OPTIONAL_HEADER.AddressOfEntryPoint
    data = pe.get_memory_mapped_image()[entry_point:]
    cs = Cs(CS_ARCH_X86, CS_MODE_32)
    cs.detail = True
    rdbin = cs.disasm(data, 0x10000)
    return rdbin


# check architecture
def check_arch(pe):
    if pe.FILE_HEADER.Machine == 0x14c:
        bit = 32
        # print(bit)
    elif pe.FILE_HEADER.Machine == 0x8664:
        bit = 64
        # print(bit)
    print("[+] Sample is %s bit" % bit)
    return bit


# Hook for GetProcAddress
def GetProcAddress(ql, addr, params):
    print(params)
    return addr, params


# stop exec at the given address
def stop(ql):
    ql.nprint("[+] Address found")
    ql.console = False
    ql.emu_stop()


# sandbox to emulate the EXE
def my_sandbox(path, rootfs):
    # setup Qiling engine
    ql = Qiling(path, rootfs)  # , output = "debug")

    # Patch address
    # ql.patch(0x0042B726, b'\x90\x90\x90')

    # Hook address
    ql.hook_address(stop, 0x0042B726)

    # hook GetProcAddress() on exit
    ql.set_api("GetProcAddress", GetProcAddress, QL_INTERCEPT.EXIT)

    # disable strace logs
    ql.filter = []
    # now emulate the EXE
    ql.run()


if __name__ == "__main__":

    exefile = sys.argv[1]
    try:
        exe = pefile.PE(exefile)
    except OSError as e:
        print(e)
        sys.exit()
    except pefile.PEFormatError as e:
        print(module.config.R + "[-] PEFormatError: %s" % e.value)
        print(module.config.R + "[!] The file is not a valid PE")
        sys.exit()

    rdbin = loader_pe(exe)
    check_arch(exe)

    # Run the execution
    my_sandbox([exefile], "examples/rootfs/x86_windows")

In [None]:
[=]     Initiate stack address at 0x7ffffffde000
[=]     Loading calc.exe to 0x140000000
[=]     PE entry point at 0x140001740
[=]     TEB is at 0x6000000
[=]     PEB is at 0x60001f0
[=]     LDR is at 0x60009c0
[!]     Failed to resolve api-ms-win-core-synch-l1-2-0.dll
[!]     Failed to resolve api-ms-win-core-processthreads-l1-1-0.dll
[!]     Failed to resolve api-ms-win-core-libraryloader-l1-2-0.dll
[x]     CPU Context:
[x]     ah      : 0x0
[x]     al      : 0x0
[x]     ch      : 0xcf
[x]     cl      : 0xe8
[x]     dh      : 0x0
[x]     dl      : 0x0
[x]     bh      : 0xa2
[x]     bl      : 0x32
[x]     ax      : 0x0
[x]     cx      : 0xcfe8
[x]     dx      : 0x0
[x]     bx      : 0xa232
[x]     sp      : 0xcfa0
[x]     bp      : 0xcfc8
[x]     si      : 0x0
[x]     di      : 0x0
[x]     ip      : 0x2a42
[x]     eax     : 0x0
[x]     ecx     : 0x1cfe8
[x]     edx     : 0x0
[x]     ebx     : 0x2ddfa232
[x]     esp     : 0x1cfa0
[x]     ebp     : 0x1cfc8
[x]     esi     : 0x0
[x]     edi     : 0x0
[x]     eip     : 0x2a42
[x]     rax     : 0x0
[x]     rbx     : 0x2b992ddfa232
[x]     rcx     : 0x80000001cfe8
[x]     rdx     : 0x0
[x]     rsi     : 0x0
[x]     rdi     : 0x0
[x]     rbp     : 0x80000001cfc8
[x]     rsp     : 0x80000001cfa0
[x]     r8      : 0x0
[x]     r9      : 0x0
[x]     r10     : 0x0
[x]     r11     : 0x0
[x]     r12     : 0x0
[x]     r13     : 0x0
[x]     r14     : 0x0
[x]     r15     : 0x0
[x]     rip     : 0x2a42
[x]     cr0     : 0x11
[x]     cr1     : 0x0
[x]     cr2     : 0x0
[x]     cr3     : 0x0
[x]     cr4     : 0x0
[x]     cr8     : 0x0
[x]     st0     : 0x0
[x]     st1     : 0x0
[x]     st2     : 0x0
[x]     st3     : 0x0
[x]     st4     : 0x0
[x]     st5     : 0x0
[x]     st6     : 0x0
[x]     st7     : 0x0
[x]     eflags  : 0x46
[x]     cs      : 0x33
[x]     ss      : 0x28
[x]     ds      : 0x0
[x]     es      : 0x0
[x]     fs      : 0x0
[x]     gs      : 0x0
[x]     r8b     : 0x0
[x]     r9b     : 0x0
[x]     r10b    : 0x0
[x]     r11b    : 0x0
[x]     r12b    : 0x0
[x]     r13b    : 0x0
[x]     r14b    : 0x0
[x]     r15b    : 0x0
[x]     r8w     : 0x0
[x]     r9w     : 0x0
[x]     r10w    : 0x0
[x]     r11w    : 0x0
[x]     r12w    : 0x0
[x]     r13w    : 0x0
[x]     r14w    : 0x0
[x]     r15w    : 0x0
[x]     r8d     : 0x0
[x]     r9d     : 0x0
[x]     r10d    : 0x0
[x]     r11d    : 0x0
[x]     r12d    : 0x0
[x]     r13d    : 0x0
[x]     r14d    : 0x0
[x]     r15d    : 0x0
[x]     fsbase  : 0x6000
[x]     gsbase  : 0x6000000
[x]     xmm0    : 0x0
[x]     xmm1    : 0x0
[x]     xmm2    : 0x0
[x]     xmm3    : 0x0
[x]     xmm4    : 0x0
[x]     xmm5    : 0x0
[x]     xmm6    : 0x0
[x]     xmm7    : 0x0
[x]     xmm8    : 0x0
[x]     xmm9    : 0x0
[x]     xmm10   : 0x0
[x]     xmm11   : 0x0
[x]     xmm12   : 0x0
[x]     xmm13   : 0x0
[x]     xmm14   : 0x0
[x]     xmm15   : 0x0
[x]     xmm16   : 0x0
[x]     xmm17   : 0x0
[x]     xmm18   : 0x0
[x]     xmm19   : 0x0
[x]     xmm20   : 0x0
[x]     xmm21   : 0x0
[x]     xmm22   : 0x0
[x]     xmm23   : 0x0
[x]     xmm24   : 0x0
[x]     xmm25   : 0x0
[x]     xmm26   : 0x0
[x]     xmm27   : 0x0
[x]     xmm28   : 0x0
[x]     xmm29   : 0x0
[x]     xmm30   : 0x0
[x]     xmm31   : 0x0
[x]     ymm0    : 0x0
[x]     ymm1    : 0x0
[x]     ymm2    : 0x0
[x]     ymm3    : 0x0
[x]     ymm4    : 0x0
[x]     ymm5    : 0x0
[x]     ymm6    : 0x0
[x]     ymm7    : 0x0
[x]     ymm8    : 0x0
[x]     ymm9    : 0x0
[x]     ymm10   : 0x0
[x]     ymm11   : 0x0
[x]     ymm12   : 0x0
[x]     ymm13   : 0x0
[x]     ymm14   : 0x0
[x]     ymm15   : 0x0
[x]     ymm16   : 0x0
[x]     ymm17   : 0x0
[x]     ymm18   : 0x0
[x]     ymm19   : 0x0
[x]     ymm20   : 0x0
[x]     ymm21   : 0x0
[x]     ymm22   : 0x0
[x]     ymm23   : 0x0
[x]     ymm24   : 0x0
[x]     ymm25   : 0x0
[x]     ymm26   : 0x0
[x]     ymm27   : 0x0
[x]     ymm28   : 0x0
[x]     ymm29   : 0x0
[x]     ymm30   : 0x0
[x]     ymm31   : 0x0
[x]     zmm0    : 0x0
[x]     zmm1    : 0x0
[x]     zmm2    : 0x0
[x]     zmm3    : 0x0
[x]     zmm4    : 0x0
[x]     zmm5    : 0x0
[x]     zmm6    : 0x0
[x]     zmm7    : 0x0
[x]     zmm8    : 0x0
[x]     zmm9    : 0x0
[x]     zmm10   : 0x0
[x]     zmm11   : 0x0
[x]     zmm12   : 0x0
[x]     zmm13   : 0x0
[x]     zmm14   : 0x0
[x]     zmm15   : 0x0
[x]     zmm16   : 0x0
[x]     zmm17   : 0x0
[x]     zmm18   : 0x0
[x]     zmm19   : 0x0
[x]     zmm20   : 0x0
[x]     zmm21   : 0x0
[x]     zmm22   : 0x0
[x]     zmm23   : 0x0
[x]     zmm24   : 0x0
[x]     zmm25   : 0x0
[x]     zmm26   : 0x0
[x]     zmm27   : 0x0
[x]     zmm28   : 0x0
[x]     zmm29   : 0x0
[x]     zmm30   : 0x0
[x]     zmm31   : 0x0
[x]     PC = 0x0000000000002a42 (unreachable)

[x]     Memory map:
[x]     Start            End              Perm    Label        Image
[x]     00000000006000 - 0000000000c000   rwx     [FS]
[x]     00000000030000 - 00000000031000   rwx     [GDT]
[x]     00000006000000 - 00000007400000   rwx     [GS]
[x]     00000140000000 - 0000014000b000   rwx     [calc.exe]   /calc.exe
[x]     00000500000000 - 00000500001000   rwx     [heap]
[x]     007ffffffde000 - 0080000001e000   rwx     [stack]
unicorn.unicorn.UcError: Invalid memory fetch (UC_ERR_FETCH_UNMAPPED)
[=]     Initiate stack address at 0x7ffffffde000
[=]     Loading calc.exe to 0x140000000
[=]     PE entry point at 0x140001740
[=]     TEB is at 0x6000000
[=]     PEB is at 0x60001f0
[=]     LDR is at 0x60009c0
[=]     Loading ntdll.dll ...
[=]     Done loading ntdll.dll
[=]     Loading kernel32.dll ...
[=]     Loading kernelbase.dll ...
[=]     Done loading kernelbase.dll
[=]     Done loading kernel32.dll
[=]     Loading ucrtbase.dll ...
[=]     Calling ucrtbase.dll DllMain at 0x180665e30
[=]     GetSystemTimeAsFileTime(lpSystemTimeAsFileTime = 0x80000001cfb8)
[x]     Error encountered while running ucrtbase.dll DllMain, bailing
[=]     Done loading ucrtbase.dll
[=]     Loading msvcrt.dll ...
[=]     Calling msvcrt.dll DllMain at 0x110107af0
[=]     GetSystemTimeAsFileTime(lpSystemTimeAsFileTime = 0x80000001cfb8)
[x]     Error encountered while running msvcrt.dll DllMain, bailing
[=]     Done loading msvcrt.dll
[=]     Loading advapi32.dll ...
[!]     Failed to resolve api-ms-win-eventing-controller-l1-1-0.dll
[!]     Failed to resolve api-ms-win-eventing-consumer-l1-1-0.dll
[!]     Failed to resolve api-ms-win-eventing-consumer-l1-1-1.dll
[!]     Failed to resolve api-ms-win-service-core-l1-1-0.dll
[!]     Failed to resolve api-ms-win-service-core-l1-1-1.dll
[!]     Failed to resolve api-ms-win-service-core-l1-1-2.dll
[!]     Failed to resolve api-ms-win-service-management-l1-1-0.dll
[!]     Failed to resolve api-ms-win-service-management-l2-1-0.dll
[!]     Failed to resolve api-ms-win-service-private-l1-1-4.dll
[!]     Failed to resolve api-ms-win-service-private-l1-1-2.dll
[!]     Failed to resolve api-ms-win-service-private-l1-1-3.dll
[!]     Failed to resolve api-ms-win-service-private-l1-1-0.dll
[!]     Failed to resolve api-ms-win-service-winsvc-l1-1-0.dll

## 8 - Ctype

In [54]:
import sys
from ctypes import *

WORD = c_ushort
DWORD = c_ulong
LPBYTE = POINTER(c_ubyte)
LPTSTR = POINTER(c_char)
HANDLE = c_void_p


# Specifies the window station, desktop, standard handles, and appearance 
# of the main window for a process at creation time.
class STARTUPINFO(Structure):
    _fields_ = [
        ('cb', DWORD),
        ('lpReserved', LPTSTR),
        ('lpDesktop', LPTSTR),
        ('lpTitle', LPTSTR),
        ('dwX', DWORD),
        ('dwY', DWORD),
        ('dwXSize', DWORD),
        ('dwYSize', DWORD),
        ('dwXCountChars', DWORD),
        ('dwYCountChars', DWORD),
        ('dwFillAttribute', DWORD),
        ('dwFlags', DWORD),
        ('wShowWindow', WORD),
        ('cbReserved2', WORD),
        ('lpReserved2', LPBYTE),
        ('hStdInput', HANDLE),
        ('hStdOutput', HANDLE),
        ('hStdError', HANDLE),
    ]

# Contains information about a newly created process and its primary thread. 
class PROCESS_INFORMATION(Structure):
    _fields_ = [
        ('hProcess', HANDLE),
        ('hThread', HANDLE),
        ('dwProcessId', DWORD),
        ('dwThreadId', DWORD),
    ]


# Process to create
exe = "C:\\Windows\\System32\\calc.exe"

# Import the kernel32 lib
kernel32 = windll.kernel32

# creation flag
CREATE_NEW_CONSOLE = 0x00000010
CREATE_SUSPENDED = 0x00000004
creation_flags = CREATE_NEW_CONSOLE | CREATE_SUSPENDED

startupinfo = STARTUPINFO()
processinfo = PROCESS_INFORMATION()
startupinfo.cb = sizeof(startupinfo)

try: 	
    kernel32.CreateProcessA(None, exe, None, None, None, creation_flags, None, None, byref(startupinfo), byref(processinfo))
    print("Process started as PID: {}".format(processinfo.dwProcessId))
    kernel32.CloseHandle(processinfo.hProcess)
    kernel32.CloseHandle(processinfo.hThread)
except Exception as e:
    print(e)
    kernel32.GetLastError()

Process started as PID: 0


In [None]:
# setup Qiling engine
ql = Qiling("file.exe", "place/of/Windows/dll/")#, output = "debug")
# disable strace logs
ql.filter = []
# now emulate the EXE
ql.run()

## 9 - Struct

In [112]:

import struct
  
# 4s -> four char to pack
# i -> integer
# f -> float
# l -> long

pack = struct.pack('4s i f l', b'test', 5, 3.14, 255)
print(pack)
  
# struct.unpack() return a tuples
unpack = struct.unpack('4s i f l', pack)
print(unpack)


b'test\x05\x00\x00\x00\xc3\xf5H@\xff\x00\x00\x00'
(b'test', 5, 3.140000104904175, 255)


In [41]:
# importing the struct module
import struct
# converting into bytes
converted_bytes = struct.pack('14s i', b'Tutorialspoint', 2020)
# converting into Python data types
print(struct.unpack('14s i', converted_bytes))

(b'Tutorialspoint', 2020)


## 10 - Yara-Python

In [55]:
import yara
rule = yara.compile(source='rule foo: bar {strings: $a = "lmn" condition: $a}')
matches = rule.match(data='abcdefgjiklmnoprstuvwxyz')
print(matches)
print(matches[0].rule)
print(matches[0].tags)
print(matches[0].strings)

[foo]
foo
['bar']
[(10, '$a', b'lmn')]


In [59]:
# Request to the Unprotect API
import requests
import json
response = requests.get("https://search.unprotect.it/api/")
jsonData = response.json()

def get_techniques_desc(url, tech):
    response2 = requests.get(url)   
    techniques = response2.json()
    if techniques['next'] != None:
        for technique in techniques["results"]:
            if technique['name'] == tech:
                print("Technique Name: " + technique['name'] + "\n")
                print("Description: " + technique['description']+ "\n")
                yarar = json.dumps(technique['detection_rules'][2]['rule'], indent=4)
                sigmar = json.dumps(technique['detection_rules'][0]['rule'], indent=4)
                capar = json.dumps(technique['detection_rules'][1]['rule'], indent=4)
                #for i in technique['snippets']:
                #    print(i["plain_code"] + "\n")
                return(yarar, sigmar, capar)
        get_techniques_desc(techniques['next'], "Kill Process")

yarar, sigmar, capar = get_techniques_desc("https://search.unprotect.it/api/techniques/", "Kill Process")

Technique Name: Kill Process

Description: Malware can kill processes such as AV process or monitoring process. For example, "wireshark.exe", "ida.exe", "procmon.exe" or any other process related to malware analysis tools in order to avoid the investigation.



In [145]:
#Using Yara against a file
import yara

rule = yara.compile(source = json.loads(yarar))
filename = "killprocess.exe"

matches = rule.match(filename)
if matches:
    print(matches)


[UNPROTECT_disable_process]
