In [5]:
import speakeasy
import pandas as pd
import numpy as np
import os
import time
import pickle

MALWARE_PATH = "../../../data/pe.dataset/"
X86_PATH = MALWARE_PATH + "PeX86Exe/"
X86_RANSOMWARE = X86_PATH + "ransomware/"

FOLDER = X86_RANSOMWARE
files_200 = [x for x in os.listdir(FOLDER)[200:400]]

Most common error in ransomware folder was this:

```
>>> print(failed_aa["error.traceback"].iloc[0])

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/site-packages/speakeasy_emulator-1.5.9-py3.6.egg/speakeasy/windows/winemu.py", line 1168, in handle_import_func
    rv = self.api.call_api_func(mod, func, argv, ctx=default_ctx)
  File "/usr/local/lib/python3.6/site-packages/speakeasy_emulator-1.5.9-py3.6.egg/speakeasy/winenv/api/winapi.py", line 77, in call_api_func
    return func(mod, self.emu, argv, ctx)
  File "/usr/local/lib/python3.6/site-packages/speakeasy_emulator-1.5.9-py3.6.egg/speakeasy/winenv/api/usermode/kernel32.py", line 3587, in GetVolumeInformation
    cw = self.get_char_width(ctx)
  File "/usr/local/lib/python3.6/site-packages/speakeasy_emulator-1.5.9-py3.6.egg/speakeasy/winenv/api/api.py", line 343, in get_char_width
    raise ApiEmuError('Failed to get character width from function: %s' % (name))
speakeasy.errors.ApiEmuError: Failed to get character width from function: kernel32.GetVolumeInformation
```

This exception is sort of cosmetic - not because of not implemented API call.

According to code under `speakeasy/winenv/api/api.py`:

```
    def get_char_width(self, ctx):
        """
        Based on the API name, determine the character width
        being used by the function
        """
        name = ctx.get('func_name', '')
        if name.endswith('A'):
            return 1
        elif name.endswith('W'):
            return 2
        raise ApiEmuError('Failed to get character width from function: %s' % (name))
```

So really this is because function name goes without ending, e.g. `GetVolumeInformationA`.

After simple fix:
```
if name.endswith('A'):
            return 1
        elif name.endswith('W'):
            return 2
        else:
            # default to A, if character width is not provided
            return 1
```
.. emulation works:

In [None]:
se = speakeasy.Speakeasy()
module = se.load_module(FOLDER + "916bc6fd4d12c7f53f81d4e3bdf21271ac70a928fab0c9622b61e0a4ac504c9d")
se.run_module(module)

report = se.get_report()
pd.json_normalize([x["apis"] for x in report["entry_points"]][0])

Unnamed: 0,pc,api_name,args,ret_val
0,0x401c89,kernel32.GetVolumeInformation,"[C:\, 0x0, 0x0, 0x1211fdc, 0x0, 0x0, 0x0, 0x0]",0x1
1,0x401cbe,kernel32.GetVersionEx,[0x1211f4c],0x1
2,0x42947a,kernel32.GetProcAddress,"[0x77000000, CreateFileA]",0xfeee0000
3,0x42947a,kernel32.GetProcAddress,"[0x77000000, CreateFileW]",0xfeee0001
4,0x42947a,kernel32.GetProcAddress,"[0x77000000, WriteFile]",0xfeee0002
...,...,...,...,...
264,0x4282d2,kernel32.SetFileAttributesW,"[0x838000, 0x7]",0x0
265,0x4282f6,kernel32.CreateProcessW,[C:\Users\speakeasy_user\dgwgkEwU\OkIkcgIM.exe...,0x1
266,0x4284e9,advapi32.RegOpenKeyExA,"[HKEY_CURRENT_USER, software\microsoft\windows...",0x3
267,0x42852a,advapi32.RegSetValueExW,"[0x0, 0x838042, 0x0, 0x1, 0x838000]",0x0


# bulk analysis

In [6]:
# generated within "dump_reports_ransomware_200_400.py"
with open("reports_ransomware_200_400_2ndrun.pickle", "rb") as fhandle:
#with open("reports_ransomware_200_400.pickle", "rb") as fhandle:
    reports = pickle.load(fhandle)

In [7]:
def parse_report(report):
    aa = pd.json_normalize(report)
    ab = pd.json_normalize(aa["entry_points"].iloc[0])
    try:
        if ab["apis"].any():
            #print(ab["apis"])
            ac = pd.json_normalize(ab["apis"].iloc[0])
        else:
            ac = []
    except AttributeError:
        ac = []
    return aa, ab, ac

bs = []
success = 0
unsupported_apis = []
for i,f in enumerate(reports.keys()):
    a, b, c = parse_report(reports[f])
    if "error.type" in b.keys():
        if b["error.type"].iloc[0] == "unsupported_api":
            unsupported_apis.append(b["error.api_name"].iloc[0])
            print(f," : " , b["error.api_name"].iloc[0], f" len of api seq. before fail: {len(b['apis'].iloc[0])}")
        else:
            print(f, " : ", b["error.type"].iloc[0], f" len of api seq. before fail: {len(b['apis'].iloc[0])}")
    else:
        success += 1

print(f"success ratio: {success*100/len(reports)} %")

7ebddf3d6d6e8cf129e5b678e924dfdba558f40de53210f37a578d8514ac172d  :  invalid_read  len of api seq. before fail: 269
7f8955c335492f7b6fecaf9857252235d45b132f68b481f08ecdc0ad18d9890a  :  KERNEL32.GetConsoleTitleW  len of api seq. before fail: 565
82cf2574cfd4f432d3345a06d99a6c67e9004935ddd520417b5bf7fc5256d551  :  invalid_read  len of api seq. before fail: 269
d042606747e507e80bdee11f13d40c167a8ad2d8adfdef9114f0cfb1965aa2e0  :  user32.GetCaretBlinkTime  len of api seq. before fail: 1
a6b94aeca7ac6e56137bf9d4f5550db79705793c64ff3e92366ce31da19400a6  :  Invalid memory write (UC_ERR_WRITE_UNMAPPED)  len of api seq. before fail: 0
582b529363991d0213d38825cc46245cd94e43aac86ae221280593413a2b35e3  :  invalid_fetch  len of api seq. before fail: 2
e8aa62ee56dbe1b2cad4674d592d3de2e3be45ee7abfc69ce700ae0ab6f36d40  :  invalid_read  len of api seq. before fail: 269
1468b37e66937115773ff355b4003454029e186598b974863c61fd62e7d6885a  :  invalid_read  len of api seq. before fail: 269
71910edd49efbbb47511

In [6]:
from collections import Counter
Counter(unsupported_apis).most_common(15)

[('advapi32.InitializeSecurityDescriptor', 87),
 ('USER32.SetMenuInfo', 6),
 ('user32.GetOpenClipboardWindow', 5),
 ('user32.GetCaretBlinkTime', 4),
 ('kernel32.GetSystemDefaultUILanguage', 3),
 ('user32.GetMessageTime', 3),
 ('user32.GetClipboardViewer', 3),
 ('user32.GetFocus', 3),
 ('user32.GetMessagePos', 3),
 ('kernel32.GetSystemDefaultLCID', 3),
 ('user32.GetMessageExtraInfo', 3),
 ('user32.GetKBCodePage', 2),
 ('kernel32.GetUserDefaultLCID', 2),
 ('user32.GetClipboardOwner', 2),
 ('KERNEL32.GetConsoleProcessList', 2)]

# specific example analysis

Investigate few samples with short API sequences (obviously, anti-debugging / anti-emulation techniques ):

In [1]:
import speakeasy
import pandas as pd

MALWARE_PATH = "../../../data/pe.dataset/"
X86_PATH = MALWARE_PATH + "PeX86Exe/"
X86_RANSOMWARE = X86_PATH + "ransomware/"


def get_apis_n_errors(fullpath):
    se = speakeasy.Speakeasy()
    module = se.load_module(fullpath)
    se.run_module(module)

    report = se.get_report()
    aaa = pd.json_normalize(report["entry_points"])
    errors = aaa[[x for x in aaa.columns if "error" in x]]

    return pd.json_normalize([x["apis"] for x in report["entry_points"]][0]), errors


In [2]:
a,e = get_apis_n_errors(X86_RANSOMWARE + "d042606747e507e80bdee11f13d40c167a8ad2d8adfdef9114f0cfb1965aa2e0")
a

Unnamed: 0,pc,api_name,args,ret_val
0,0x401005,kernel32.GetConsoleWindow,[],0x198
1,0x40101b,user32.GetCaretBlinkTime,[],0x3e8
2,0x401031,user32.GetFocus,[],0x1a0
3,0x5188a3,kernel32.GetSystemDefaultUILanguage,[],0x409
4,0x5188cb,kernel32.GetSystemDefaultUILanguage,[],0x409
5,0x518734,kernel32.GetUserDefaultLCID,[],0x400
6,0x5183af,kernel32.GetUserDefaultLCID,[],0x400
7,0x5189af,kernel32.GetUserDefaultLCID,[],0x400
8,0x518907,kernel32.GetUserDefaultLCID,[],0x400
9,0x518268,kernel32.GetUserDefaultLCID,[],0x400


In [3]:
e

0


In [2]:
a,e = get_apis_n_errors(X86_RANSOMWARE + "37f5efd1aed9594306f7d26841829d6f11d0e73efc68eeda0ebde9ee1f1cfe75")
a

Unnamed: 0,pc,api_name,args,ret_val
0,0x401008,kernel32.GetProcessHeap,[],0x45f0
1,0x401013,user32.GetMessageTime,[],0x602a6137


In [3]:
e

0


In [14]:
a, e = get_apis_n_errors("../../../data/pe.dataset/PeX86Exe/ransomware/8bf742a7ff63433089f69de0cca6007257545d068a1e7d5ab92ebe2302825741")

In [15]:
a

Unnamed: 0,pc,api_name,args,ret_val
0,0x401008,kernel32.GetCurrentThreadId,[],0xb00
1,0x401013,kernel32.GetCurrentThreadId,[],0xb00


In [16]:
e

0


In [2]:
a, e = get_apis_n_errors("../../../data/pe.dataset/PeX86Exe/ransomware/a2d6949a5a0aeef96eeb2c18e4ac7f642b72f0741a0c86518c816a29dae98430")
a

Unnamed: 0,pc,api_name,args,ret_val
0,0x401008,user32.GetFocus,[],0x1a0
1,0x401013,user32.GetFocus,[],0x1a4


In [3]:
e

0


In [4]:
a, e = get_apis_n_errors("../../../data/pe.dataset/PeX86Exe/ransomware/e76b579c0f7c849d3dcf910366e28e7943214f1f1fe9c26e51446520a959d841")

In [5]:
a

Unnamed: 0,pc,api_name,args,ret_val
0,0x401008,kernel32.GetCurrentThreadId,[],0x528
1,0x401013,user32.GetMessagePos,[],0x19999999


In [6]:
e

0


In [2]:
a,e = get_apis_n_errors("../../../data/pe.dataset/PeX86Exe/ransomware/9c52bb6f877d0439b6bd428ebac249588f8bda566fc6b3c5fb5ebd8cd0cfcaba")
a

Unnamed: 0,pc,api_name,args,ret_val
0,0x401008,user32.GetCursor,[],0x1a0
1,0x401013,user32.GetClipboardSequenceNumber,[],0x0


In [3]:
e

0
