# Model Robustness

**Objectives**
- Load the Model(s)
- Run predictions on the model using the Holdout/Test Datasplit (better if the malicious samples are verified for malware types)
- Record instances where the model failed to predict correctly
- Record the performance of the models (e.g., accuracy, precision, recall, ROC-AUC, etc.)

In [1]:
import pandas as pd

import lightgbm as lgbm
import catboost as catb

from joblib import load

## 1. Load the Test/Holdout Dataset

In [2]:
DF_LGBM_TB = pd.read_csv('../Dataset/TB/LGBM_TB_Test.csv', low_memory=False)
DF_LGBM_IB = pd.read_csv('../Dataset/IB/LGBM_IB_Test.csv', low_memory=False)

display(DF_LGBM_TB.head())
display(DF_LGBM_IB.head())

Unnamed: 0,malware,t_0,t_1,t_2,t_3,t_4,t_5,t_6,t_7,t_8,...,t_91,t_92,t_93,t_94,t_95,t_96,t_97,t_98,t_99,hash
0,1,154,177,191,154,177,191,154,134,135,...,135,141,136,135,136,135,136,135,134,f0f8ba4c3d750a4ce2deea48152a33d4
1,1,254,209,134,136,135,136,135,136,135,...,97,92,232,136,135,236,219,232,236,39b2d87c1adb582fbcacc3a56e274d48
2,1,95,153,170,153,134,135,134,153,262,...,134,135,134,135,153,134,135,180,154,429236cdeb63d68bf48a3b48b0a34612
3,1,231,177,191,154,177,191,154,184,139,...,97,156,159,174,154,94,153,16,97,46079cbf0bcfe8fab9894b4ec88bece3
4,1,95,136,135,136,135,136,135,136,135,...,95,232,236,232,236,232,236,232,236,303ceda3f52afa9b69ed4f97fec2c895


Unnamed: 0,malware,t_0,t_1,t_2,t_3,t_4,t_5,t_6,t_7,t_8,...,t_91,t_92,t_93,t_94,t_95,t_96,t_97,t_98,t_99,hash
0,1,154,177,191,134,135,93,153,232,56,...,148,148,148,148,148,148,148,148,148,f0f8ba4c3d750a4ce2deea48152a33d4
1,1,254,209,134,136,135,181,174,232,236,...,148,148,148,148,148,148,148,148,148,39b2d87c1adb582fbcacc3a56e274d48
2,1,95,153,170,134,135,262,136,231,235,...,148,148,148,148,148,148,148,148,148,429236cdeb63d68bf48a3b48b0a34612
3,1,231,177,191,154,184,139,153,134,135,...,148,148,148,148,148,148,148,148,148,46079cbf0bcfe8fab9894b4ec88bece3
4,1,95,136,135,134,262,28,177,191,154,...,148,148,148,148,148,148,148,148,148,303ceda3f52afa9b69ed4f97fec2c895


In [3]:
DF_CATB_TB = pd.read_csv('../Dataset/TB/CATB_TB_Test.csv', low_memory=False)
DF_CATB_IB = pd.read_csv('../Dataset/IB/CATB_IB_Test.csv', low_memory=False)

display(DF_CATB_TB.head())
display(DF_CATB_IB.head())

Unnamed: 0,malware,t_0,t_1,t_2,t_3,t_4,t_5,t_6,t_7,t_8,...,t_91,t_92,t_93,t_94,t_95,t_96,t_97,t_98,t_99,hash
0,1,NtClose,NtOpenKey,NtQueryValueKey,NtClose,NtOpenKey,NtQueryValueKey,NtClose,LdrGetDllHandle,LdrGetProcedureAddress,...,LdrGetProcedureAddress,LookupAccountSidW,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,LdrGetDllHandle,f0f8ba4c3d750a4ce2deea48152a33d4
1,1,SetErrorMode,OleInitialize,LdrGetDllHandle,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,...,GetSystemWindowsDirectoryW,GetSystemDirectoryW,RegOpenKeyExW,LdrLoadDll,LdrGetProcedureAddress,RegQueryValueExW,RegCloseKey,RegOpenKeyExW,RegQueryValueExW,39b2d87c1adb582fbcacc3a56e274d48
2,1,GetSystemTimeAsFileTime,NtAllocateVirtualMemory,NtFreeVirtualMemory,NtAllocateVirtualMemory,LdrGetDllHandle,LdrGetProcedureAddress,LdrGetDllHandle,NtAllocateVirtualMemory,SetUnhandledExceptionFilter,...,LdrGetDllHandle,LdrGetProcedureAddress,LdrGetDllHandle,LdrGetProcedureAddress,NtAllocateVirtualMemory,LdrGetDllHandle,LdrGetProcedureAddress,NtOpenProcess,NtClose,429236cdeb63d68bf48a3b48b0a34612
3,1,RegOpenKeyExA,NtOpenKey,NtQueryValueKey,NtClose,NtOpenKey,NtQueryValueKey,NtClose,NtQueryAttributesFile,LoadStringA,...,GetSystemWindowsDirectoryW,NtCreateFile,NtCreateSection,NtMapViewOfSection,NtClose,GetSystemMetrics,NtAllocateVirtualMemory,CreateActCtxW,GetSystemWindowsDirectoryW,46079cbf0bcfe8fab9894b4ec88bece3
4,1,GetSystemTimeAsFileTime,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,LdrLoadDll,LdrGetProcedureAddress,...,GetSystemTimeAsFileTime,RegOpenKeyExW,RegQueryValueExW,RegOpenKeyExW,RegQueryValueExW,RegOpenKeyExW,RegQueryValueExW,RegOpenKeyExW,RegQueryValueExW,303ceda3f52afa9b69ed4f97fec2c895


Unnamed: 0,malware,t_0,t_1,t_2,t_3,t_4,t_5,t_6,t_7,t_8,...,t_91,t_92,t_93,t_94,t_95,t_96,t_97,t_98,t_99,hash
0,1,NtClose,NtOpenKey,NtQueryValueKey,LdrGetDllHandle,LdrGetProcedureAddress,GetSystemInfo,NtAllocateVirtualMemory,RegOpenKeyExW,FindFirstFileExW,...,,,,,,,,,,f0f8ba4c3d750a4ce2deea48152a33d4
1,1,SetErrorMode,OleInitialize,LdrGetDllHandle,LdrLoadDll,LdrGetProcedureAddress,NtOpenSection,NtMapViewOfSection,RegOpenKeyExW,RegQueryValueExW,...,,,,,,,,,,39b2d87c1adb582fbcacc3a56e274d48
2,1,GetSystemTimeAsFileTime,NtAllocateVirtualMemory,NtFreeVirtualMemory,LdrGetDllHandle,LdrGetProcedureAddress,SetUnhandledExceptionFilter,LdrLoadDll,RegOpenKeyExA,RegQueryValueExA,...,,,,,,,,,,429236cdeb63d68bf48a3b48b0a34612
3,1,RegOpenKeyExA,NtOpenKey,NtQueryValueKey,NtClose,NtQueryAttributesFile,LoadStringA,NtAllocateVirtualMemory,LdrGetDllHandle,LdrGetProcedureAddress,...,,,,,,,,,,46079cbf0bcfe8fab9894b4ec88bece3
4,1,GetSystemTimeAsFileTime,LdrLoadDll,LdrGetProcedureAddress,LdrGetDllHandle,SetUnhandledExceptionFilter,CryptAcquireContextW,NtOpenKey,NtQueryValueKey,NtClose,...,,,,,,,,,,303ceda3f52afa9b69ed4f97fec2c895


## 3. Run `classification_report` and Confusion Matrix  in each of the GBDT Models and each of its two behavior-types.

### Run other metrics as well as you see fit that are not found in the classification_report and Confusion Matrix.

- Label column is index 0/'malicious'
- Feature column is indexes [1:101]

In [4]:
#LightGBM
default_tb = load('../GBDT_Training/Outputs/LGBM/Demo Train (Default)/DEMO_LGBM_TB.model')
default_ib = load('../GBDT_Training/Outputs/LGBM/Demo Train (Default)/DEMO_LGBM_IB.model')
tuned_tb = load('../GBDT_Training/Outputs/LGBM/Demo Train (Tuned)/TUNED_DEMO_LGBM_TB.model')
tuned_ib = load('../GBDT_Training/Outputs/LGBM/Demo Train (Tuned)/TUNED_DEMO_LGBM_IB.model')

'''
WRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY

1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB
2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB
3. CATB IB: DF_CATB_TB & MODEL_CATB_TB
4. CATB IB: DF_CATB_IB & MODEL_CATB_IB
'''

'\nWRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY\n\n1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB\n2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB\n3. CATB IB: DF_CATB_TB & MODEL_CATB_TB\n4. CATB IB: DF_CATB_IB & MODEL_CATB_IB\n'

In [5]:
#CatBoost (CatBoost requires instantiation before being able to load a trianed model file).
default_tb = catb.CatBoostClassifier()
default_ib = catb.CatBoostClassifier()
tuned_tb = catb.CatBoostClassifier()
tuned_ib = catb.CatBoostClassifier()
default_tb = default_tb.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Default)/DEMO_CATB_TB.model", format='json')
default_ib = default_ib.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Default)/DEMO_CATB_IB.model", format='json')
tuned_tb = tuned_tb.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Tuned)/TUNED_DEMO_CATB_TB.model", format='json')
tuned_ib = tuned_ib.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Tuned)/TUNED_DEMO_CATB_IB.model", format='json')

'''
WRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY

1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB
2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB
3. CATB IB: DF_CATB_TB & MODEL_CATB_TB
4. CATB IB: DF_CATB_IB & MODEL_CATB_IB
'''

'\nWRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY\n\n1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB\n2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB\n3. CATB IB: DF_CATB_TB & MODEL_CATB_TB\n4. CATB IB: DF_CATB_IB & MODEL_CATB_IB\n'

## 4. Check for model capabilities at a malware type level in each GBDT Models and each of its two behavior-types.

Using the `xx` dataset, determine the prediction capability of the model in each malware type.

1. Filter the validated dataset(s) to remove any samples with '_' on the `Type 1` column, leaving only malicious samples with specified malware types.
2. On each unique malware type (there are 9 of those), create a subset dataframe of each malware type and run the same classification report and confusion matrix to determine the model's performance on a per-malware type perspective. This will determine if the dataset's imbalance in terms of malware types affects model's performance and to determine the strengths and weakness of the model.

In [7]:
# LightGBM

#Load Verified Test/Holodut Dataset
DF_LGBM_TB = pd.read_csv('../Dataset/Valid_Mali_Test/LGBM_TB_Test_Malicious.csv', low_memory=False)
DF_LGBM_IB = pd.read_csv('../Dataset/Valid_Mali_Test/LGBM_IB_Test_Malicious.csv', low_memory=False)

#Models
default_tb = load('../GBDT_Training/Outputs/LGBM/Demo Train (Default)/DEMO_LGBM_TB.model')
default_ib = load('../GBDT_Training/Outputs/LGBM/Demo Train (Default)/DEMO_LGBM_IB.model')
tuned_tb = load('../GBDT_Training/Outputs/LGBM/Demo Train (Tuned)/TUNED_DEMO_LGBM_TB.model')
tuned_ib = load('../GBDT_Training/Outputs/LGBM/Demo Train (Tuned)/TUNED_DEMO_LGBM_IB.model')

'''
WRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY

1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB
2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB
3. CATB IB: DF_CATB_TB & MODEL_CATB_TB
4. CATB IB: DF_CATB_IB & MODEL_CATB_IB
'''

'\nWRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY\n\n1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB\n2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB\n3. CATB IB: DF_CATB_TB & MODEL_CATB_TB\n4. CATB IB: DF_CATB_IB & MODEL_CATB_IB\n'

In [8]:
# CatBoost

#Load Verified Test/Holdout Dataset
DF_CATB_TB = pd.read_csv('../Dataset/Valid_Mali_Test/CATB_TB_Test_Malicious.csv', low_memory=False)
DF_CATB_IB = pd.read_csv('../Dataset/Valid_Mali_Test/CATB_IB_Test_Malicious.csv', low_memory=False)

#CatBoost (CatBoost requires instantiation before being able to load a trianed model file).
default_tb = catb.CatBoostClassifier()
default_ib = catb.CatBoostClassifier()
tuned_tb = catb.CatBoostClassifier()
tuned_ib = catb.CatBoostClassifier()
default_tb = default_tb.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Default)/DEMO_CATB_TB.model", format='json')
default_ib = default_ib.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Default)/DEMO_CATB_IB.model", format='json')
tuned_tb = tuned_tb.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Tuned)/TUNED_DEMO_CATB_TB.model", format='json')
tuned_ib = tuned_ib.load_model("../GBDT_Training/Outputs/CATB/Demo Training (Tuned)/TUNED_DEMO_CATB_IB.model", format='json')

'''
WRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY

1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB
2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB
3. CATB IB: DF_CATB_TB & MODEL_CATB_TB
4. CATB IB: DF_CATB_IB & MODEL_CATB_IB
'''

'\nWRITE CODE HERE, THE [MODEL_xxxx_xx] IS A LOADED MODEL ALREADY, YOU CAN RUN PREDICT HRE ALREADY\n\n1. LGBM TB: DF_LGBM_TB & MODEL_LGBM_TB\n2. LGBM IB: DF_LGBM_IB & MODEL_LGBM_IB\n3. CATB IB: DF_CATB_TB & MODEL_CATB_TB\n4. CATB IB: DF_CATB_IB & MODEL_CATB_IB\n'