# Importing Libraries

In [1]:
import pandas as pd
import numpy as np

#Evaluation and Testing Systems
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split

#ML Models
from sklearn.ensemble import AdaBoostClassifier #AdaBoost
import xgboost as xgb #XGBoost; Install via pip first
import lightgbm as lgb #LightGBM; pip install lightgbm

# Importing Datasets

## MalBehavD-V1

**Dataset URL:** https://github.com/mpasco/MalbehavD-V1

In [2]:
malbehavd = pd.read_csv("../Datasets/MalBehavD_2022/MalBehavD-V1-dataset.csv")

**0. Dataset Specifications**

In [3]:
malbehavd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2570 entries, 0 to 2569
Columns: 177 entries, sha256 to Unnamed: 176
dtypes: int64(1), object(176)
memory usage: 3.5+ MB


**1. Entire Dataset**

In [4]:
malbehavd

Unnamed: 0,sha256,labels,0,1,2,3,4,5,6,7,...,Unnamed: 167,Unnamed: 168,Unnamed: 169,Unnamed: 170,Unnamed: 171,Unnamed: 172,Unnamed: 173,Unnamed: 174,Unnamed: 175,Unnamed: 176
0,5c18291c481a192ed5003084dab2d8a117fd3736359218...,0,LdrUnloadDll,CoUninitialize,NtQueryKey,NtDuplicateObject,GetShortPathNameW,GetSystemInfo,IsDebuggerPresent,GetSystemWindowsDirectoryW,...,,,,,,,,,,
1,4683faf3da550ffb594cf5513c4cbb34f64df85f27fd1c...,0,NtOpenMutant,GetForegroundWindow,NtQueryKey,DrawTextExW,NtSetInformationFile,RegQueryValueExA,LdrGetProcedureAddress,CoUninitialize,...,,,,,,,,,,
2,9a0aea1c7290031d7c3429d0e921f107282cc6eab854ee...,0,GetForegroundWindow,DrawTextExW,GetSystemInfo,IsDebuggerPresent,GetSystemWindowsDirectoryW,NtQueryValueKey,RegCloseKey,GetFileAttributesW,...,,,,,,,,,,
3,e0f3e4d5f50afd9c31e51dd9941c5a52d57c7c524f5d11...,0,NtQueryValueKey,LdrUnloadDll,GlobalMemoryStatus,WriteConsoleA,NtOpenKey,LdrGetProcedureAddress,NtTerminateProcess,NtClose,...,,,,,,,,,,
4,ec2b6d29992f13e74015ff0b129150b4afae15c593e4b7...,0,LdrUnloadDll,GetSystemTimeAsFileTime,NtOpenKey,WSAStartup,SetUnhandledExceptionFilter,NtTerminateProcess,NtClose,NtAllocateVirtualMemory,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2565,ed6a3fc04af435176b9c2f3024eb53c31d1e522da52c5c...,1,CreateToolhelp32Snapshot,GetCursorPos,CoUninitialize,RegCloseKey,LdrUnloadDll,DrawTextExW,NtSetInformationFile,CopyFileA,...,,,,,,,,,,
2566,ed5d70a13633a46355c0c2f9905ba29b7b74dfdb4db321...,1,NtDuplicateObject,RegCloseKey,LdrUnloadDll,NtSetInformationFile,RegQueryValueExA,NtTerminateProcess,NtQueryValueKey,RegQueryValueExW,...,,,,,,,,,,
2567,ed5addbdbe5f56f108530148c71ab7db806ac9324395d0...,1,GetCursorPos,NtOpenSection,CoUninitialize,RegCloseKey,LdrUnloadDll,GetSystemInfo,RegQueryValueExA,NtTerminateProcess,...,,,,,,,,,,
2568,ed4f4518e3120a4fd8ff6c61bf072d4de60264711a9196...,1,NtAllocateVirtualMemory,LdrGetProcedureAddress,SetUnhandledExceptionFilter,GetFileType,GetSystemTimeAsFileTime,LdrLoadDll,LdrGetDllHandle,NtProtectVirtualMemory,...,,,,,,,,,,


**2. Features Only**

In [5]:
features = malbehavd.iloc[:, 2:177]
features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,Unnamed: 167,Unnamed: 168,Unnamed: 169,Unnamed: 170,Unnamed: 171,Unnamed: 172,Unnamed: 173,Unnamed: 174,Unnamed: 175,Unnamed: 176
0,LdrUnloadDll,CoUninitialize,NtQueryKey,NtDuplicateObject,GetShortPathNameW,GetSystemInfo,IsDebuggerPresent,GetSystemWindowsDirectoryW,NtClose,GetFileVersionInfoSizeW,...,,,,,,,,,,
1,NtOpenMutant,GetForegroundWindow,NtQueryKey,DrawTextExW,NtSetInformationFile,RegQueryValueExA,LdrGetProcedureAddress,CoUninitialize,NtQueryValueKey,RegCloseKey,...,,,,,,,,,,
2,GetForegroundWindow,DrawTextExW,GetSystemInfo,IsDebuggerPresent,GetSystemWindowsDirectoryW,NtQueryValueKey,RegCloseKey,GetFileAttributesW,RegQueryValueExW,NtMapViewOfSection,...,,,,,,,,,,
3,NtQueryValueKey,LdrUnloadDll,GlobalMemoryStatus,WriteConsoleA,NtOpenKey,LdrGetProcedureAddress,NtTerminateProcess,NtClose,NtAllocateVirtualMemory,LdrGetDllHandle,...,,,,,,,,,,
4,LdrUnloadDll,GetSystemTimeAsFileTime,NtOpenKey,WSAStartup,SetUnhandledExceptionFilter,NtTerminateProcess,NtClose,NtAllocateVirtualMemory,NtQueryAttributesFile,LdrGetDllHandle,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2565,CreateToolhelp32Snapshot,GetCursorPos,CoUninitialize,RegCloseKey,LdrUnloadDll,DrawTextExW,NtSetInformationFile,CopyFileA,GetSystemWindowsDirectoryW,NtQueryValueKey,...,,,,,,,,,,
2566,NtDuplicateObject,RegCloseKey,LdrUnloadDll,NtSetInformationFile,RegQueryValueExA,NtTerminateProcess,NtQueryValueKey,RegQueryValueExW,NtFreeVirtualMemory,NtCreateThreadEx,...,,,,,,,,,,
2567,GetCursorPos,NtOpenSection,CoUninitialize,RegCloseKey,LdrUnloadDll,GetSystemInfo,RegQueryValueExA,NtTerminateProcess,NtQueryValueKey,GetFileAttributesW,...,,,,,,,,,,
2568,NtAllocateVirtualMemory,LdrGetProcedureAddress,SetUnhandledExceptionFilter,GetFileType,GetSystemTimeAsFileTime,LdrLoadDll,LdrGetDllHandle,NtProtectVirtualMemory,NtQueryValueKey,LdrUnloadDll,...,,,,,,,,,,


**3. Unique API calls on MalBehavD-V1**

In [6]:
combined_summary = []
features['summary'] = features.values.tolist()
for i in range(features['summary'].shape[0]):
    length = len(features['summary'].loc[i])
    for j in range(length-1):
        combined_summary.append(features['summary'].loc[i][j])
print("combined_summary:", len(combined_summary))
malbehavd_features = pd.Series(combined_summary).dropna().drop_duplicates().reset_index()
malbehavd_features.rename(columns={'index': 'index', 0: 'api_calls'}, inplace=True)
malbehavd_features.drop(axis=1, labels="index", inplace=True)
malbehavd_features.sort_values(inplace=True, by='api_calls')
malbehavd_features
#malbehavd_features = pd.Series(combined_summary).dropna().drop_duplicates()

combined_summary: 447180


Unnamed: 0,api_calls
142,CertControlStore
245,CertCreateCertificateContext
138,CertOpenStore
277,CertOpenSystemStoreW
89,CoCreateInstance
...,...
264,sendto
83,setsockopt
222,shutdown
168,socket


## Oliveira Dataset

**Dataset URL:** https://ieee-dataport.org/open-access/malware-analysis-datasets-api-call-sequences

**Modification:** The version of the dataset here was converted from the

In [7]:
og_oliveira = pd.read_csv("../Datasets/Oliveira_2019/dynamic_api_call_sequence_per_malware_100_0_306.csv")

**0. Dataset Specifications (Original Dataset)**

In [8]:
og_oliveira.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43876 entries, 0 to 43875
Columns: 102 entries, hash to malware
dtypes: int64(101), object(1)
memory usage: 34.1+ MB


**1. Entire Dataset (Original)**

In [9]:
mal_col = og_oliveira.pop('malware')
og_oliveira.insert(1, mal_col.name, mal_col)
og_oliveira

Unnamed: 0,hash,malware,t_0,t_1,t_2,t_3,t_4,t_5,t_6,t_7,...,t_90,t_91,t_92,t_93,t_94,t_95,t_96,t_97,t_98,t_99
0,071e8c3f8922e186e57548cd4c703a5d,1,112,274,158,215,274,158,215,298,...,117,71,297,135,171,215,35,208,56,71
1,33f8e6d08a6aae939f25a8e0d63dd523,1,82,208,187,208,172,117,172,117,...,60,81,240,117,71,297,135,171,215,35
2,b68abd064e975e1c6d5f25e748663076,1,16,110,240,117,240,117,240,117,...,123,65,112,123,65,112,123,65,113,112
3,72049be7bd30ea61297ea624ae198067,1,82,208,187,208,172,117,172,117,...,215,208,302,208,302,187,208,302,228,302
4,c9b3700a77facf29172f32df6bc77f48,1,82,240,117,240,117,240,117,240,...,40,209,260,40,209,260,141,260,141,260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43871,e3d6d58faa040f0f9742c9d0eaf58be4,1,82,240,117,240,117,240,117,240,...,260,141,260,141,260,141,260,141,260,141
43872,9b917bab7f32188ae40c744f2be9aaf8,1,82,240,117,240,117,240,117,240,...,82,159,224,82,159,224,82,159,224,82
43873,35a18ee05f75f04912018d9f462cb990,1,82,240,117,240,117,240,117,240,...,141,260,141,260,141,260,141,260,141,260
43874,654139d715abcf7ecdddbef5a84f224b,1,82,240,117,240,117,240,117,240,...,260,141,260,141,260,141,260,141,260,141


**2. Entire Dataset (Pre-Cleaned)**

In [10]:
oliveira = pd.read_csv("../Datasets/Oliveira_2019/cleaned_dynamic_api_call_sequence_per_malware_100_0_306.csv")
mal_col = oliveira.pop('malware')
oliveira.insert(1, mal_col.name, mal_col)
oliveira

Unnamed: 0,hash,malware,t_0,t_1,t_2,t_3,t_4,t_5,t_6,t_7,...,t_90,t_91,t_92,t_93,t_94,t_95,t_96,t_97,t_98,t_99
0,071e8c3f8922e186e57548cd4c703a5d,1,HttpSendRequestA,WSAAccept,NtCreateSection,Process32NextW,WSAAccept,NtCreateSection,Process32NextW,recvfrom,...,InternetConnectA,GetComputerNameW,recv,LdrGetProcedureAddress,NtLoadDriver,Process32NextW,CryptHashData,OleInitialize,FindFirstFileExW,GetComputerNameW
1,33f8e6d08a6aae939f25a8e0d63dd523,1,GetFileVersionInfoExW,OleInitialize,NtQueryKey,OleInitialize,NtLoadKey,InternetConnectA,NtLoadKey,InternetConnectA,...,FindResourceW,GetFileType,RemoveDirectoryW,InternetConnectA,GetComputerNameW,recv,LdrGetProcedureAddress,NtLoadDriver,Process32NextW,CryptHashData
2,b68abd064e975e1c6d5f25e748663076,1,CreateActCtxW,HttpOpenRequestW,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,...,InternetGetConnectedStateExW,GetAdaptersAddresses,HttpSendRequestA,InternetGetConnectedStateExW,GetAdaptersAddresses,HttpSendRequestA,InternetGetConnectedStateExW,GetAdaptersAddresses,HttpSendRequestW,HttpSendRequestA
3,72049be7bd30ea61297ea624ae198067,1,GetFileVersionInfoExW,OleInitialize,NtQueryKey,OleInitialize,NtLoadKey,InternetConnectA,NtLoadKey,InternetConnectA,...,Process32NextW,OleInitialize,setsockopt,OleInitialize,setsockopt,NtQueryKey,OleInitialize,setsockopt,RegEnumValueA,setsockopt
4,c9b3700a77facf29172f32df6bc77f48,1,GetFileVersionInfoExW,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,...,CryptUnprotectMemory,OpenSCManagerA,SetStdHandle,CryptUnprotectMemory,OpenSCManagerA,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43871,e3d6d58faa040f0f9742c9d0eaf58be4,1,GetFileVersionInfoExW,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,...,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW
43872,9b917bab7f32188ae40c744f2be9aaf8,1,GetFileVersionInfoExW,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,...,GetFileVersionInfoExW,NtCreateThreadEx,RegDeleteValueW,GetFileVersionInfoExW,NtCreateThreadEx,RegDeleteValueW,GetFileVersionInfoExW,NtCreateThreadEx,RegDeleteValueW,GetFileVersionInfoExW
43873,35a18ee05f75f04912018d9f462cb990,1,GetFileVersionInfoExW,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,...,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle
43874,654139d715abcf7ecdddbef5a84f224b,1,GetFileVersionInfoExW,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,InternetConnectA,RemoveDirectoryW,...,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW,SetStdHandle,LookupAccountSidW


**3. Unique API calls on Oliveira**

In [11]:
oliveira_idx = ['NtOpenThread', 'ExitWindowsEx', 'FindResourceW', 'CryptExportKey', 'CreateRemoteThreadEx', 'MessageBoxTimeoutW', 'InternetCrackUrlW', 'StartServiceW', 'GetFileSize', 'GetVolumeNameForVolumeMountPointW', 'GetFileInformationByHandle', 'CryptAcquireContextW', 'RtlDecompressBuffer', 'SetWindowsHookExA', 'RegSetValueExW', 'LookupAccountSidW', 'SetUnhandledExceptionFilter', 'InternetConnectA', 'GetComputerNameW', 'RegEnumValueA', 'NtOpenFile', 'NtSaveKeyEx', 'HttpOpenRequestA', 'recv', 'GetFileSizeEx', 'LoadStringW', 'SetInformationJobObject', 'WSAConnect', 'CryptDecrypt', 'GetTimeZoneInformation', 'InternetOpenW', 'CoInitializeEx', 'CryptGenKey', 'GetAsyncKeyState', 'NtQueryInformationFile', 'GetSystemMetrics', 'NtDeleteValueKey', 'NtOpenKeyEx', 'sendto', 'IsDebuggerPresent', 'RegQueryInfoKeyW', 'NetShareEnum', 'InternetOpenUrlW', 'WSASocketA', 'CopyFileExW', 'connect', 'ShellExecuteExW', 'SearchPathW', 'GetUserNameA', 'InternetOpenUrlA', 'LdrUnloadDll', 'EnumServicesStatusW', 'EnumServicesStatusA', 'WSASend', 'CopyFileW', 'NtDeleteFile', 'CreateActCtxW', 'timeGetTime', 'MessageBoxTimeoutA', 'CreateServiceA', 'FindResourceExW', 'WSAAccept', 'InternetConnectW', 'HttpSendRequestA', 'GetVolumePathNameW', 'RegCloseKey', 'InternetGetConnectedStateExW', 'GetAdaptersInfo', 'shutdown', 'NtQueryMultipleValueKey', 'NtQueryKey', 'GetSystemWindowsDirectoryW', 'GlobalMemoryStatusEx', 'GetFileAttributesExW', 'OpenServiceW', 'getsockname', 'LoadStringA', 'UnhookWindowsHookEx', 'NtCreateUserProcess', 'Process32NextW', 'CreateThread', 'LoadResource', 'GetSystemTimeAsFileTime', 'SetStdHandle', 'CoCreateInstanceEx', 'GetSystemDirectoryA', 'NtCreateMutant', 'RegCreateKeyExW', 'IWbemServices_ExecQuery', 'NtDuplicateObject', 'Thread32First', 'OpenSCManagerW', 'CreateServiceW', 'GetFileType', 'MoveFileWithProgressW', 'NtDeviceIoControlFile', 'GetFileInformationByHandleEx', 'CopyFileA', 'NtLoadKey', 'GetNativeSystemInfo', 'NtOpenProcess', 'CryptUnprotectMemory', 'InternetWriteFile', 'ReadProcessMemory', 'gethostbyname', 'WSASendTo', 'NtOpenSection', 'listen', 'WSAStartup', 'socket', 'OleInitialize', 'FindResourceA', 'RegOpenKeyExA', 'RegEnumKeyExA', 'NtQueryDirectoryFile', 'CertOpenSystemStoreW', 'ControlService', 'LdrGetProcedureAddress', 'GlobalMemoryStatus', 'NtSetInformationFile', 'OutputDebugStringA', 'GetAdaptersAddresses', 'CoInitializeSecurity', 'RegQueryValueExA', 'NtQueryFullAttributesFile', 'DeviceIoControl', '__anomaly__', 'DeleteFileW', 'GetShortPathNameW', 'NtGetContextThread', 'GetKeyboardState', 'RemoveDirectoryA', 'InternetSetStatusCallback', 'NtResumeThread', 'SetFileInformationByHandle', 'NtCreateSection', 'NtQueueApcThread', 'accept', 'DecryptMessage', 'GetUserNameExW', 'SizeofResource', 'RegQueryValueExW', 'SetWindowsHookExW', 'HttpOpenRequestW', 'CreateDirectoryW', 'InternetOpenA', 'GetFileVersionInfoExW', 'FindWindowA', 'closesocket', 'RtlAddVectoredExceptionHandler', 'IWbemServices_ExecMethod', 'GetDiskFreeSpaceExW', 'TaskDialog', 'WriteConsoleW', 'CryptEncrypt', 'WSARecvFrom', 'NtOpenMutant', 'CoGetClassObject', 'NtQueryValueKey', 'NtDelayExecution', 'select', 'HttpQueryInfoA', 'GetVolumePathNamesForVolumeNameW', 'RegDeleteValueW', 'InternetCrackUrlA', 'OpenServiceA', 'InternetSetOptionA', 'CreateDirectoryExW', 'bind', 'NtShutdownSystem', 'DeleteUrlCacheEntryA', 'NtMapViewOfSection', 'LdrGetDllHandle', 'NtCreateKey', 'GetKeyState', 'CreateRemoteThread', 'NtEnumerateValueKey', 'SetFileAttributesW', 'NtUnmapViewOfSection', 'RegDeleteValueA', 'CreateJobObjectW', 'send', 'NtDeleteKey', 'SetEndOfFile', 'GetUserNameExA', 'GetComputerNameA', 'URLDownloadToFileW', 'NtFreeVirtualMemory', 'recvfrom', 'NtUnloadDriver', 'NtTerminateThread', 'CryptUnprotectData', 'NtCreateThreadEx', 'DeleteService', 'GetFileAttributesW', 'GetFileVersionInfoSizeExW', 'OpenSCManagerA', 'WriteProcessMemory', 'GetSystemInfo', 'SetFilePointer', 'Module32FirstW', 'ioctlsocket', 'RegEnumKeyW', 'RtlCompressBuffer', 'SendNotifyMessageW', 'GetAddrInfoW', 'CryptProtectData', 'Thread32Next', 'NtAllocateVirtualMemory', 'RegEnumKeyExW', 'RegSetValueExA', 'DrawTextExA', 'CreateToolhelp32Snapshot', 'FindWindowW', 'CoUninitialize', 'NtClose', 'WSARecv', 'CertOpenStore', 'InternetGetConnectedState', 'RtlAddVectoredContinueHandler', 'RegDeleteKeyW', 'SHGetSpecialFolderLocation', 'CreateProcessInternalW', 'NtCreateDirectoryObject', 'EnumWindows', 'DrawTextExW', 'RegEnumValueW', 'SendNotifyMessageA', 'NtProtectVirtualMemory', 'NetUserGetLocalGroups', 'GetUserNameW', 'WSASocketW', 'getaddrinfo', 'AssignProcessToJobObject', 'SetFileTime', 'WriteConsoleA', 'CryptDecodeObjectEx', 'EncryptMessage', 'system', 'NtSetContextThread', 'LdrLoadDll', 'InternetGetConnectedStateExA', 'RtlCreateUserThread', 'GetCursorPos', 'Module32NextW', 'RegCreateKeyExA', 'NtLoadDriver', 'NetUserGetInfo', 'SHGetFolderPathW', 'GetBestInterfaceEx', 'CertControlStore', 'StartServiceA', 'NtWriteFile', 'Process32FirstW', 'NtReadVirtualMemory', 'GetDiskFreeSpaceW', 'GetFileVersionInfoW', 'FindFirstFileExW', 'FindWindowExW', 'GetSystemWindowsDirectoryA', 'RegOpenKeyExW', 'CoCreateInstance', 'NtQuerySystemInformation', 'LookupPrivilegeValueW', 'NtReadFile', 'ReadCabinetState', 'GetForegroundWindow', 'InternetCloseHandle', 'FindWindowExA', 'ObtainUserAgentString', 'CryptCreateHash', 'GetTempPathW', 'CryptProtectMemory', 'NetGetJoinInformation', 'NtOpenKey', 'GetSystemDirectoryW', 'DnsQuery_A', 'RegQueryInfoKeyA', 'NtEnumerateKey', 'RegisterHotKey', 'RemoveDirectoryW', 'FindFirstFileExA', 'CertOpenSystemStoreA', 'NtTerminateProcess', 'NtSetValueKey', 'CryptAcquireContextA', 'SetErrorMode', 'UuidCreate', 'RtlRemoveVectoredExceptionHandler', 'RegDeleteKeyA', 'setsockopt', 'FindResourceExA', 'NtSuspendThread', 'GetFileVersionInfoSizeW', 'NtOpenDirectoryObject', 'InternetQueryOptionA', 'InternetReadFile', 'NtCreateFile', 'NtQueryAttributesFile', 'HttpSendRequestW', 'CryptHashMessage', 'CryptHashData', 'NtWriteVirtualMemory', 'SetFilePointerEx', 'CertCreateCertificateContext', 'DeleteUrlCacheEntryW', '__exception__']
oliveira_idx.sort()
print(len(oliveira_idx))
oliveira_idx

307


['AssignProcessToJobObject',
 'CertControlStore',
 'CertCreateCertificateContext',
 'CertOpenStore',
 'CertOpenSystemStoreA',
 'CertOpenSystemStoreW',
 'CoCreateInstance',
 'CoCreateInstanceEx',
 'CoGetClassObject',
 'CoInitializeEx',
 'CoInitializeSecurity',
 'CoUninitialize',
 'ControlService',
 'CopyFileA',
 'CopyFileExW',
 'CopyFileW',
 'CreateActCtxW',
 'CreateDirectoryExW',
 'CreateDirectoryW',
 'CreateJobObjectW',
 'CreateProcessInternalW',
 'CreateRemoteThread',
 'CreateRemoteThreadEx',
 'CreateServiceA',
 'CreateServiceW',
 'CreateThread',
 'CreateToolhelp32Snapshot',
 'CryptAcquireContextA',
 'CryptAcquireContextW',
 'CryptCreateHash',
 'CryptDecodeObjectEx',
 'CryptDecrypt',
 'CryptEncrypt',
 'CryptExportKey',
 'CryptGenKey',
 'CryptHashData',
 'CryptHashMessage',
 'CryptProtectData',
 'CryptProtectMemory',
 'CryptUnprotectData',
 'CryptUnprotectMemory',
 'DecryptMessage',
 'DeleteFileW',
 'DeleteService',
 'DeleteUrlCacheEntryA',
 'DeleteUrlCacheEntryW',
 'DeviceIoControl',

In [12]:
features = oliveira.iloc[:, 2:]
combined_summary = []
features['summary'] = features.values.tolist()
for i in range(features['summary'].shape[0]):
    length = len(features['summary'].loc[i])
    for j in range(length-1):
        combined_summary.append(features['summary'].loc[i][j])
print("combined_summary:", len(combined_summary))
oliveira_features = pd.Series(combined_summary).dropna().drop_duplicates().reset_index()
oliveira_features.rename(columns={'index': 'index', 0: 'api_calls'}, inplace=True)
oliveira_features.drop(axis=1, labels="index", inplace=True)
oliveira_features.sort_values(inplace=True, by='api_calls')
oliveira_features

combined_summary: 4343724


Unnamed: 0,api_calls
157,AssignProcessToJobObject
57,CertCreateCertificateContext
225,CertOpenStore
176,CertOpenSystemStoreW
205,CoCreateInstance
...,...
50,sendto
47,setsockopt
95,shutdown
202,system


## Comparing MalbehavD-V1 & Oliveira Dataset