In [99]:
# Import Libraries
import pandas as pd
import lightgbm as lgbm
import catboost as catb
from joblib import load

import sklearn.metrics as metric

In [100]:
# Load Models
default_tb_lgbm = load('./GBDT_Training/Outputs/Results/Demo/LGBM/Train (Default)/DEMO_LGBM_TB.model') # <== Point these to the respective .model files
default_ib_lgbm = load('./GBDT_Training/Outputs/Results/Demo/LGBM/Train (Default)/DEMO_LGBM_IB.model')
tuned_tb_lgbm = load('./GBDT_Training/Outputs/Results/Demo/LGBM/Train (Tuned)/TUNED_DEMO_LGBM_TB.model')
tuned_ib_lgbm = load('./GBDT_Training/Outputs/Results/Demo/LGBM/Train (Tuned)/TUNED_DEMO_LGBM_IB.model')

default_tb_catb = catb.CatBoostClassifier()
default_ib_catb = catb.CatBoostClassifier()
tuned_tb_catb = catb.CatBoostClassifier()
tuned_ib_catb = catb.CatBoostClassifier()
default_tb_catb = default_tb_catb.load_model("./GBDT_Training/Outputs/Results/Demo/CATB/Train (Default)//DEMO_CATB_TB.model", format='json') # <== Point these to the respective .model files
default_ib_catb = default_ib_catb.load_model("./GBDT_Training/Outputs/Results/Demo/CATB/Train (Default)/DEMO_CATB_IB.model", format='json')
tuned_tb_catb = tuned_tb_catb.load_model("./GBDT_Training/Outputs/Results/Demo/CATB/Train (Tuned)/TUNED_DEMO_CATB_TB.model", format='json')
tuned_ib_catb = tuned_ib_catb.load_model("./GBDT_Training/Outputs/Results/Demo/CATB/Train (Tuned)/TUNED_DEMO_CATB_IB.model", format='json')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [101]:
# Load Test/Holdout Datasets

DF_LGBM_TB = pd.read_csv('./Dataset/TB/LGBM_TB_Test.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
DF_LGBM_IB = pd.read_csv('./Dataset/IB/LGBM_IB_Test.csv', low_memory=False)

DF_CATB_TB = pd.read_csv('./Dataset/TB/CATB_TB_Test.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
DF_CATB_IB = pd.read_csv('./Dataset/IB/CATB_IB_Test.csv', low_memory=False)

DF_CATB_TB = DF_CATB_TB.fillna("NaN")
DF_CATB_IB = DF_CATB_IB.fillna("NaN")

In [102]:
print("Default TB LGBM")
print(metric.classification_report(DF_LGBM_TB['malware'], default_tb_lgbm.predict(DF_LGBM_TB.iloc[:,1:101])))

print("Tuned TB LGBM")
print(metric.classification_report(DF_LGBM_TB['malware'], tuned_tb_lgbm.predict(DF_LGBM_TB.iloc[:,1:101])))

Default TB LGBM
              precision    recall  f1-score   support

           0       0.92      0.55      0.69       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.95      0.77      0.84      4388
weighted avg       0.99      0.99      0.99      4388

Tuned TB LGBM
              precision    recall  f1-score   support

           0       0.90      0.53      0.67       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.95      0.76      0.83      4388
weighted avg       0.99      0.99      0.99      4388



In [103]:
print("Default IB LGBM")
print(metric.classification_report(DF_LGBM_IB['malware'], default_ib_lgbm.predict(DF_LGBM_IB.iloc[:,1:101])))
print("Tuned IB LGBM")
print(metric.classification_report(DF_LGBM_IB['malware'], tuned_ib_lgbm.predict(DF_LGBM_IB.iloc[:,1:101])))

Default IB LGBM
              precision    recall  f1-score   support

           0       0.89      0.62      0.73       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.94      0.81      0.86      4388
weighted avg       0.99      0.99      0.99      4388

Tuned IB LGBM
              precision    recall  f1-score   support

           0       0.82      0.52      0.64       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.90      0.76      0.81      4388
weighted avg       0.98      0.99      0.98      4388



In [104]:
print("Default TB CATB")
print(metric.classification_report(DF_CATB_TB['malware'], default_tb_catb.predict(DF_CATB_TB.iloc[:,1:101])))
print("Tuned TB CATB")
print(metric.classification_report(DF_CATB_TB['malware'], tuned_tb_catb.predict(DF_CATB_TB.iloc[:,1:101])))

Default TB CATB
              precision    recall  f1-score   support

           0       0.95      0.58      0.72       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.97      0.79      0.86      4388
weighted avg       0.99      0.99      0.99      4388

Tuned TB CATB
              precision    recall  f1-score   support

           0       0.94      0.60      0.73       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.96      0.80      0.86      4388
weighted avg       0.99      0.99      0.99      4388



In [105]:
print("Default IB CATB")
print(metric.classification_report(DF_CATB_IB['malware'], default_ib_catb.predict(DF_CATB_IB.iloc[:,1:101])))
print("Tuned IB CATB")
print(metric.classification_report(DF_CATB_IB['malware'], tuned_ib_catb.predict(DF_CATB_IB.iloc[:,1:101])))

Default IB CATB
              precision    recall  f1-score   support

           0       0.92      0.65      0.76       104
           1       0.99      1.00      1.00      4284

    accuracy                           0.99      4388
   macro avg       0.96      0.83      0.88      4388
weighted avg       0.99      0.99      0.99      4388

Tuned IB CATB
              precision    recall  f1-score   support

           0       0.92      0.59      0.72       104
           1       0.99      1.00      0.99      4284

    accuracy                           0.99      4388
   macro avg       0.96      0.79      0.86      4388
weighted avg       0.99      0.99      0.99      4388

