In [1]:
import pandas as pd
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from datetime import datetime

df = pd.read_csv("feature_updated.csv", low_memory=False)


### STANDARD SETUP

features = df.columns
to_exclude = ['code', 'status', 'year', 'month', 'dataset', 'split', 'author', 'category', 'MV']
features = [x for x in features if x not in to_exclude]
# from 0 to 2152: metadata features; from 2152 to 6152 (end): code features
# if you want to use the "combined classifier", choose all features

label = 'status'
threshold = 0.912 # this was derived by Ben





In [30]:
def test_clf(clf, test_set, features=features, label=label, threshold=threshold):
    '''Handy function to test a classifier, compute the probabilities and print results'''
    pred = clf.predict(test_set[features])
    probabilities = clf.predict_proba(test_set[features])


    y_pred = []
    for prob in probabilities:
        if prob[0] >= threshold:
            y_pred.append("benign")
        else:
            y_pred.append("malware")


    acc = sk.metrics.accuracy_score(test_set[label], y_pred)
    prec = sk.metrics.precision_score(test_set[label], y_pred, pos_label='malware')
    rec = sk.metrics.recall_score(test_set[label], y_pred, pos_label='malware')
    fpr = 1-sk.metrics.recall_score(test_set[label], y_pred, pos_label='benign')



    print("Accuracy: {:.2f}%,\tPrecision: {:.2f}%\tRecall: {:.2f}%\tFPR: {:.5f}".format(acc*100, prec*100, rec*100, fpr))

    display(pd.crosstab(test_set[label], y_pred, rownames=['True'], colnames=['Pred']))


    return probabilities, acc, prec, rec, fpr


def get_accuracy(clf, test_set, features=features, label=label, threshold=threshold):
    probabilities = clf.predict_proba(test_set[features])


    y_pred = []
    for prob in probabilities:
        if prob[0] >= threshold:
            y_pred.append("benign")
        else:
            y_pred.append("malware")

    return sk.metrics.accuracy_score(test_set[label], y_pred)

In [31]:
## These are the extensions of "Dataset L" (i.e., published/updated before 2023)

df_2022 = df[df['year']<=2022]



In [32]:
df_2022_benign = df_2022[df_2022['status']=='benign']
df_2022_malicious = df_2022[df_2022['status']=='malware']

from sklearn.model_selection import train_test_split
train_ben, test_ben = train_test_split(df_2022_benign, test_size=0.2, random_state=0)
train_mal, test_mal = train_test_split(df_2022_malicious, test_size=0.2, random_state=0)

train = pd.concat([train_ben, train_mal])
test=pd.concat([test_ben,test_mal])

In [33]:
## Define and train the classifier
rf = RandomForestClassifier(n_estimators=300, max_features="sqrt", criterion="gini",
                                   n_jobs=-2, class_weight="balanced", random_state=1)
start = datetime.now()
rf.fit(train[features], train[label])
print(f"Training time: f{datetime.now()- start}")


## Test the classifier

prob_2022, acc, prec, rec, fpr = test_clf(rf, test, threshold=threshold)

Training time: f0:00:22.763429
Accuracy: 98.64%,	Precision: 90.69%	Recall: 96.27%	FPR: 0.01094


Pred,benign,malware
True,Unnamed: 1_level_1,Unnamed: 2_level_1
benign,12202,135
malware,51,1315


In [34]:
train_2019 = df[df['year']<=2018]
test_2019 = df[df['year']==2019]

train_2020 = df[df['year']<=2019]
test_2020 = df[df['year']==2020]

train_2021 = df[df['year']<=2020]
test_2021 = df[df['year']==2021]

train_2022 = df[df['year']<=2021]
test_2022 = df[df['year']==2022]

train_2023 = df[df['year']<=2022]

In [35]:
## Define and train the classifier
rf_2019 = RandomForestClassifier(n_estimators=300, max_features="sqrt", criterion="gini",
                                   n_jobs=-2, class_weight="balanced", random_state=1)
start = datetime.now()
rf_2019.fit(train_2019[features], train_2019[label])
print(f"Training time: f{datetime.now()- start}")


## Test the classifier

prob_2019, acc, prec, rec, fpr = test_clf(rf_2019, test_2019, threshold=threshold)

Training time: f0:00:04.354942
Accuracy: 94.23%,	Precision: 99.92%	Recall: 82.03%	FPR: 0.00031


Pred,benign,malware
True,Unnamed: 1_level_1,Unnamed: 2_level_1
benign,6512,2
malware,550,2511


In [36]:
## Define and train the classifier
rf_2020 = RandomForestClassifier(n_estimators=300, max_features="sqrt", criterion="gini",
                                   n_jobs=-2, class_weight="balanced", random_state=1)
start = datetime.now()
rf_2020.fit(train_2020[features], train_2020[label])
print(f"Training time: f{datetime.now()- start}")


## Test the classifier

prob_2020, acc, prec, rec, fpr = test_clf(rf_2020, test_2020, threshold=threshold)

Training time: f0:00:07.124002
Accuracy: 96.31%,	Precision: 99.26%	Recall: 81.25%	FPR: 0.00143


Pred,benign,malware
True,Unnamed: 1_level_1,Unnamed: 2_level_1
benign,9080,13
malware,401,1738


In [37]:
## Define and train the classifier
rf_2021 = RandomForestClassifier(n_estimators=300, max_features="sqrt", criterion="gini",
                                   n_jobs=-2, class_weight="balanced", random_state=1)
start = datetime.now()
rf_2021.fit(train_2021[features], train_2021[label])
print(f"Training time: f{datetime.now()- start}")


## Test the classifier

prob_2021, acc, prec, rec, fpr = test_clf(rf_2021, test_2021, threshold=threshold)

Training time: f0:00:22.383738
Accuracy: 94.91%,	Precision: 39.42%	Recall: 55.70%	FPR: 0.03494


Pred,benign,malware
True,Unnamed: 1_level_1,Unnamed: 2_level_1
benign,10993,398
malware,206,259


In [38]:
## Define and train the classifier
rf_2022 = RandomForestClassifier(n_estimators=300, max_features="sqrt", criterion="gini",
                                   n_jobs=-2, class_weight="balanced", random_state=1)
start = datetime.now()
rf_2022.fit(train_2022[features], train_2022[label])
print(f"Training time: f{datetime.now()- start}")


## Test the classifier

prob_2022, acc, prec, rec, fpr = test_clf(rf_2022, test_2022, threshold=threshold)

Training time: f0:00:20.916882
Accuracy: 95.13%,	Precision: 29.14%	Recall: 52.49%	FPR: 0.03650


Pred,benign,malware
True,Unnamed: 1_level_1,Unnamed: 2_level_1
benign,13543,513
malware,191,211


In [39]:
## Define and train the classifier
rf_2023 = RandomForestClassifier(n_estimators=300, max_features="sqrt", criterion="gini",
                                   n_jobs=-2, class_weight="balanced", random_state=1)
start = datetime.now()
rf_2023.fit(train_2023[features], train_2023[label])
print(f"Training time: f{datetime.now()- start}")

Training time: f0:00:25.792007


In [40]:
## NEXT CELLS focus on the feature importances

In [41]:
# 1. Extract feature importances
importances = rf_2019.feature_importances_

# 2. Pair each feature with its importance
feature_importances = list(zip(features, importances))

# 3. Sort by importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# 4. Print the top 5 features (feature name and importance)
print("Top 5 most important features:")
for name, importance in feature_importances[:5]:
    print(f"{name}: {importance:.4f}")

Top 5 most important features:
fileCount: 0.0272
Full-Summary LOVE: 0.0212
Full-Summary HIGH: 0.0211
157: 0.0209
105: 0.0171


In [42]:
# 1. Extract feature importances
importances = rf_2020.feature_importances_

# 2. Pair each feature with its importance
feature_importances = list(zip(features, importances))

# 3. Sort by importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# 4. Print the top 5 features (feature name and importance)
print("Top 5 most important features:")
for name, importance in feature_importances[:5]:
    print(f"{name}: {importance:.4f}")

Top 5 most important features:
fileCount: 0.0271
Permissions topSites: 0.0245
Full-Summary THEME: 0.0196
Full-Summary WALLPAPER: 0.0185
Full-Summary FAVORITE: 0.0175


In [43]:
# 1. Extract feature importances
importances = rf_2021.feature_importances_

# 2. Pair each feature with its importance
feature_importances = list(zip(features, importances))

# 3. Sort by importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# 4. Print the top 5 features (feature name and importance)
print("Top 5 most important features:")
for name, importance in feature_importances[:5]:
    print(f"{name}: {importance:.4f}")

Top 5 most important features:
fileCount: 0.0241
Full-Summary WALLPAPER: 0.0223
size: 0.0178
Full-Summary FAVORITE: 0.0170
Description TAB: 0.0164


In [44]:
# 1. Extract feature importances
importances = rf_2022.feature_importances_

# 2. Pair each feature with its importance
feature_importances = list(zip(features, importances))

# 3. Sort by importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# 4. Print the top 5 features (feature name and importance)
print("Top 5 most important features:")
for name, importance in feature_importances[:5]:
    print(f"{name}: {importance:.4f}")

Top 5 most important features:
fileCount: 0.0213
Full-Summary WALLPAPER: 0.0196
size: 0.0170
Description NEW: 0.0146
Full-Summary THEME: 0.0126


In [45]:
# 1. Extract feature importances
importances = rf_2023.feature_importances_

# 2. Pair each feature with its importance
feature_importances = list(zip(features, importances))

# 3. Sort by importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# 4. Print the top 5 features (feature name and importance)
print("Top 5 most important features:")
for name, importance in feature_importances[:5]:
    print(f"{name}: {importance:.4f}")

Top 5 most important features:
fileCount: 0.0206
Full-Summary FAVORITE: 0.0154
Description NEW: 0.0151
Full-Summary TIME: 0.0131
size: 0.0117


In [47]:
## We now assess the "drifting" perspective of CATEGORIES of extensions

## First, let's specify the "valid categories". Anything that is not among these is discarded

valid_categories = ["7_productivity", "14_fun", "11_web-development", "1_communication", "22_accessibility", "12_shopping", "28_photos", "6_news", "38_search-tools"]


In [48]:
def test_across_categories(test_df, valid_categories, features, model, threshold, label):

    for cat in valid_categories:
        # Select only the rows of 'test' that belong to the current category
        subset = test_df[test_df["category"] == cat]
        num_samples = len(subset)

        if num_samples == 0:
            print(f"No test examples for category '{cat}'.")
            continue


        # Compute the accuracy
        accuracy = get_accuracy(model, subset, features, label, threshold)


        # Print category, number of samples, and accuracy
        print(f"Category '{cat}': # samples = {num_samples}, accuracy = {accuracy:.4f}")

    # 2. Treat all categories not in valid_categories as one single group
    others_subset = test_df[~test_df["category"].isin(valid_categories)]
    num_others = len(others_subset)

    if num_others == 0:
        print("No test examples for 'others' category group.")
    else:
        accuracy_others = get_accuracy(model, others_subset, features, label, threshold)
        print(f"'Others' group: # samples = {num_others}, accuracy = {accuracy_others:.4f}")

In [49]:

# First, I take the benign and malicious extensions for every test set

benign_2019 = test_2019[test_2019["status"]=="benign"]
malicious_2019 = test_2019[test_2019["status"]=="malware"]

benign_2020 = test_2020[test_2020["status"]=="benign"]
malicious_2020 = test_2020[test_2020["status"]=="malware"]

benign_2021 = test_2021[test_2021["status"]=="benign"]
malicious_2021 = test_2021[test_2021["status"]=="malware"]

benign_2022 = test_2022[test_2022["status"]=="benign"]
malicious_2022 = test_2022[test_2022["status"]=="malware"]

In [50]:
# And now we test!

print("BENIGN 2019\n")
test_across_categories(benign_2019, valid_categories, features, model=rf_2019, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2019, benign_2019, features, label, threshold))
print("total benign: ",len(benign_2019)) ## Len benign

print("\n\nMALICIOUS 2019\n")
test_across_categories(malicious_2019, valid_categories, features, model=rf_2019, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2019, malicious_2019, features, label, threshold))
print("total malicious: ",len(malicious_2019)) ## Len benign

BENIGN 2019

Category '7_productivity': # samples = 2941, accuracy = 1.0000
Category '14_fun': # samples = 809, accuracy = 1.0000
Category '11_web-development': # samples = 886, accuracy = 1.0000
Category '1_communication': # samples = 727, accuracy = 1.0000
Category '22_accessibility': # samples = 651, accuracy = 0.9985
Category '12_shopping': # samples = 335, accuracy = 1.0000
Category '28_photos': # samples = 51, accuracy = 0.9804
Category '6_news': # samples = 102, accuracy = 1.0000
Category '38_search-tools': # samples = 1, accuracy = 1.0000
'Others' group: # samples = 11, accuracy = 1.0000

benign avg acc:  0.9996929689898679
total benign:  6514


MALICIOUS 2019

Category '7_productivity': # samples = 34, accuracy = 0.0000
Category '14_fun': # samples = 93, accuracy = 0.0108
Category '11_web-development': # samples = 30, accuracy = 0.0000
Category '1_communication': # samples = 18, accuracy = 0.0000
Category '22_accessibility': # samples = 11, accuracy = 0.0000
Category '12_shopp

In [51]:
print("BENIGN 2020\n")
test_across_categories(benign_2020, valid_categories, features, model=rf_2020, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2020, benign_2020, features, label, threshold))
print("total benign: ",len(benign_2020)) ## Len benign

print("\n\nMALICIOUS 2020\n")
test_across_categories(malicious_2020, valid_categories, features, model=rf_2020, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2020, malicious_2020, features, label, threshold))
print("total malicious: ",len(malicious_2020)) ## Len benign

BENIGN 2020

Category '7_productivity': # samples = 4417, accuracy = 0.9986
Category '14_fun': # samples = 1011, accuracy = 0.9960
Category '11_web-development': # samples = 1048, accuracy = 0.9990
Category '1_communication': # samples = 969, accuracy = 0.9979
Category '22_accessibility': # samples = 870, accuracy = 1.0000
Category '12_shopping': # samples = 541, accuracy = 1.0000
Category '28_photos': # samples = 70, accuracy = 1.0000
Category '6_news': # samples = 164, accuracy = 1.0000
No test examples for category '38_search-tools'.
'Others' group: # samples = 3, accuracy = 1.0000

benign avg acc:  0.9985703288243704
total benign:  9093


MALICIOUS 2020

Category '7_productivity': # samples = 168, accuracy = 0.2738
Category '14_fun': # samples = 124, accuracy = 0.3065
Category '11_web-development': # samples = 16, accuracy = 0.1250
Category '1_communication': # samples = 61, accuracy = 0.1311
Category '22_accessibility': # samples = 261, accuracy = 0.6897
Category '12_shopping': # 

In [52]:
print("BENIGN 2021\n")
test_across_categories(benign_2021, valid_categories, features, model=rf_2021, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2021, benign_2021, features, label, threshold))
print("total benign: ",len(benign_2021)) ## Len benign

print("\n\nMALICIOUS 2021\n")
test_across_categories(malicious_2021, valid_categories, features, model=rf_2021, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2021, malicious_2021, features, label, threshold))
print("total malicious: ",len(malicious_2021)) ## Len benign

BENIGN 2021

Category '7_productivity': # samples = 5784, accuracy = 0.9751
Category '14_fun': # samples = 1151, accuracy = 0.9053
Category '11_web-development': # samples = 1160, accuracy = 0.9836
Category '1_communication': # samples = 1027, accuracy = 0.9620
Category '22_accessibility': # samples = 1195, accuracy = 0.9674
Category '12_shopping': # samples = 771, accuracy = 0.9844
Category '28_photos': # samples = 109, accuracy = 0.7248
Category '6_news': # samples = 193, accuracy = 0.9689
No test examples for category '38_search-tools'.
'Others' group: # samples = 1, accuracy = 1.0000

benign avg acc:  0.9650601351944518
total benign:  11391


MALICIOUS 2021

Category '7_productivity': # samples = 186, accuracy = 0.3925
Category '14_fun': # samples = 127, accuracy = 0.7717
Category '11_web-development': # samples = 15, accuracy = 0.2667
Category '1_communication': # samples = 23, accuracy = 0.4783
Category '22_accessibility': # samples = 44, accuracy = 0.5000
Category '12_shopping':

In [53]:
print("BENIGN 2022\n")
test_across_categories(benign_2022, valid_categories, features, model=rf_2022, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2022, benign_2022, features, label, threshold))
print("total benign: ",len(benign_2022)) ## Len benign

print("\n\nMALICIOUS 2022\n")
test_across_categories(malicious_2022, valid_categories, features, model=rf_2022, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2022, malicious_2022, features, label, threshold))
print("total malicious: ",len(malicious_2022)) ## Len benign

BENIGN 2022

Category '7_productivity': # samples = 7308, accuracy = 0.9733
Category '14_fun': # samples = 1605, accuracy = 0.8935
Category '11_web-development': # samples = 1488, accuracy = 0.9879
Category '1_communication': # samples = 1138, accuracy = 0.9587
Category '22_accessibility': # samples = 1447, accuracy = 0.9765
Category '12_shopping': # samples = 763, accuracy = 0.9817
Category '28_photos': # samples = 140, accuracy = 0.7786
Category '6_news': # samples = 163, accuracy = 0.9816
Category '38_search-tools': # samples = 3, accuracy = 1.0000
'Others' group: # samples = 1, accuracy = 1.0000

benign avg acc:  0.9635031303357997
total benign:  14056


MALICIOUS 2022

Category '7_productivity': # samples = 117, accuracy = 0.4786
Category '14_fun': # samples = 90, accuracy = 0.8333
Category '11_web-development': # samples = 15, accuracy = 0.4667
Category '1_communication': # samples = 81, accuracy = 0.2963
Category '22_accessibility': # samples = 47, accuracy = 0.3404
Category '12

In [54]:
## Let's try again but without splitting per class

In [55]:
print("2019\n")
test_across_categories(test_2019, valid_categories, features, model=rf_2019, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2019, test_2019, features, label, threshold))
print("total samples: ",len(test_2019)) ## Len benign

2019

Category '7_productivity': # samples = 2975, accuracy = 0.9886
Category '14_fun': # samples = 902, accuracy = 0.8980
Category '11_web-development': # samples = 916, accuracy = 0.9672
Category '1_communication': # samples = 745, accuracy = 0.9758
Category '22_accessibility': # samples = 662, accuracy = 0.9819
Category '12_shopping': # samples = 339, accuracy = 0.9882
Category '28_photos': # samples = 2156, accuracy = 0.8330
Category '6_news': # samples = 104, accuracy = 1.0000
Category '38_search-tools': # samples = 764, accuracy = 0.9987
'Others' group: # samples = 12, accuracy = 0.9167

avg acc:  0.9423498694516971
total samples:  9575


In [56]:
print("2020\n")
test_across_categories(test_2020, valid_categories, features, model=rf_2020, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2020, test_2020, features, label, threshold))
print("total samples: ",len(test_2020)) ## Len benign

2020

Category '7_productivity': # samples = 4585, accuracy = 0.9721
Category '14_fun': # samples = 1135, accuracy = 0.9207
Category '11_web-development': # samples = 1064, accuracy = 0.9859
Category '1_communication': # samples = 1030, accuracy = 0.9466
Category '22_accessibility': # samples = 1131, accuracy = 0.9284
Category '12_shopping': # samples = 549, accuracy = 0.9854
Category '28_photos': # samples = 1528, accuracy = 0.9941
Category '6_news': # samples = 166, accuracy = 0.9880
Category '38_search-tools': # samples = 40, accuracy = 0.3750
'Others' group: # samples = 4, accuracy = 0.7500

avg acc:  0.9631410256410257
total samples:  11232


In [57]:
print("2021\n")
test_across_categories(test_2021, valid_categories, features, model=rf_2021, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2021, test_2021, features, label, threshold))
print("total samples: ",len(test_2021)) ## Len benign

2021

Category '7_productivity': # samples = 5970, accuracy = 0.9570
Category '14_fun': # samples = 1278, accuracy = 0.8920
Category '11_web-development': # samples = 1175, accuracy = 0.9745
Category '1_communication': # samples = 1050, accuracy = 0.9514
Category '22_accessibility': # samples = 1239, accuracy = 0.9508
Category '12_shopping': # samples = 800, accuracy = 0.9825
Category '28_photos': # samples = 118, accuracy = 0.7373
Category '6_news': # samples = 195, accuracy = 0.9692
Category '38_search-tools': # samples = 28, accuracy = 0.5000
'Others' group: # samples = 3, accuracy = 0.3333

avg acc:  0.949055330634278
total samples:  11856


In [58]:
print("2022\n")
test_across_categories(test_2022, valid_categories, features, model=rf_2022, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2022, test_2022, features, label, threshold))
print("total samples: ",len(test_2022)) ## Len benign

2022

Category '7_productivity': # samples = 7425, accuracy = 0.9655
Category '14_fun': # samples = 1695, accuracy = 0.8903
Category '11_web-development': # samples = 1503, accuracy = 0.9827
Category '1_communication': # samples = 1219, accuracy = 0.9147
Category '22_accessibility': # samples = 1494, accuracy = 0.9565
Category '12_shopping': # samples = 770, accuracy = 0.9740
Category '28_photos': # samples = 148, accuracy = 0.7905
Category '6_news': # samples = 163, accuracy = 0.9816
Category '38_search-tools': # samples = 39, accuracy = 0.6667
'Others' group: # samples = 2, accuracy = 1.0000

avg acc:  0.951307234748928
total samples:  14458


In [None]:
̀̀̀̀̀̀̀̀##########################

# Let's focus on MANIFEST. The idea is doing the same things as above, only for manifest though

In [60]:
def test_manifest(test_df, features, model, threshold, label):
    valid_manifests = [2,3]
    for MV in valid_manifests:
        # Select only the rows of 'test' that belong to the current MV
        subset = test_df[test_df["MV"] == MV]
        num_samples = len(subset)

        if num_samples == 0:
            print(f"No test examples for MV '{MV}'.")
            continue


        # Compute the accuracy
        accuracy = get_accuracy(model, subset, features, label, threshold)


        # Print MV, number of samples, and accuracy
        print(f"MV '{MV}': # samples = {num_samples}, accuracy = {accuracy:.4f}")

    # 2. Treat all categories not in valid_MVs as one single group
    others_subset = test_df[~test_df["MV"].isin(valid_manifests)]
    num_others = len(others_subset)

    if num_others == 0:
        print("No test examples for 'others' MVs.")
    else:
        accuracy_others = get_accuracy(model, others_subset, features, label, threshold)
        print(f"'Others' MVs: # samples = {num_others}, accuracy = {accuracy_others:.4f}")

In [61]:
print("BENIGN 2019\n")
test_manifest(benign_2019, features, model=rf_2019, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2019, benign_2019, features, label, threshold))
print("total benign: ",len(benign_2019)) ## Len benign

print("\n\nMALICIOUS 2019\n")
test_manifest(malicious_2019, features, model=rf_2019, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2019, malicious_2019, features, label, threshold))
print("total malicious: ",len(malicious_2019)) ## Len benign

BENIGN 2019

MV '2': # samples = 6514, accuracy = 0.9997
No test examples for MV '3'.
No test examples for 'others' MVs.

benign avg acc:  0.9996929689898679
total benign:  6514


MALICIOUS 2019

MV '2': # samples = 3061, accuracy = 0.8203
No test examples for MV '3'.
No test examples for 'others' MVs.

malicious avg acc:  0.8203201568114995
total malicious:  3061


In [62]:
print("BENIGN 2020\n")
test_manifest(benign_2020, features, model=rf_2020, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2020, benign_2020, features, label, threshold))
print("total benign: ",len(benign_2020)) ## Len benign

print("\n\nMALICIOUS 2020\n")
test_manifest(malicious_2020, features, model=rf_2020, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2020, malicious_2020, features, label, threshold))
print("total malicious: ",len(malicious_2020)) ## Len benign

BENIGN 2020

MV '2': # samples = 9093, accuracy = 0.9986
No test examples for MV '3'.
No test examples for 'others' MVs.

benign avg acc:  0.9985703288243704
total benign:  9093


MALICIOUS 2020

MV '2': # samples = 2139, accuracy = 0.8125
No test examples for MV '3'.
No test examples for 'others' MVs.

malicious avg acc:  0.812529219261337
total malicious:  2139


In [63]:
print("BENIGN 2021\n")
test_manifest(benign_2021, features, model=rf_2021, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2021, benign_2021, features, label, threshold))
print("total benign: ",len(benign_2021)) ## Len benign

print("\n\nMALICIOUS 2021\n")
test_manifest(malicious_2021, features, model=rf_2021, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2021, malicious_2021, features, label, threshold))
print("total malicious: ",len(malicious_2021)) ## Len benign

BENIGN 2021

MV '2': # samples = 9377, accuracy = 0.9593
MV '3': # samples = 2014, accuracy = 0.9921
No test examples for 'others' MVs.

benign avg acc:  0.9650601351944518
total benign:  11391


MALICIOUS 2021

MV '2': # samples = 407, accuracy = 0.6143
MV '3': # samples = 58, accuracy = 0.1552
No test examples for 'others' MVs.

malicious avg acc:  0.556989247311828
total malicious:  465


In [64]:
print("BENIGN 2022\n")
test_manifest(benign_2022, features, model=rf_2022, threshold=threshold, label=label)
print("\nbenign avg acc: ", get_accuracy(rf_2022, benign_2022, features, label, threshold))
print("total benign: ",len(benign_2022)) ## Len benign

print("\n\nMALICIOUS 2022\n")
test_manifest(malicious_2022, features, model=rf_2022, threshold=threshold, label=label)
print("\nmalicious avg acc: ", get_accuracy(rf_2022, malicious_2022, features, label, threshold))
print("total malicious: ",len(malicious_2022)) ## Len benign

BENIGN 2022

MV '2': # samples = 3841, accuracy = 0.9427
MV '3': # samples = 10215, accuracy = 0.9713
No test examples for 'others' MVs.

benign avg acc:  0.9635031303357997
total benign:  14056


MALICIOUS 2022

MV '2': # samples = 83, accuracy = 0.8313
MV '3': # samples = 319, accuracy = 0.4451
No test examples for 'others' MVs.

malicious avg acc:  0.5248756218905473
total malicious:  402


In [65]:
## And now again without separating benign and malicious



In [66]:
print("2019\n")
test_manifest(test_2019, features, model=rf_2019, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2019, test_2019, features, label, threshold))
print("total samples: ",len(test_2019)) ## Len benign

2019

MV '2': # samples = 9575, accuracy = 0.9423
No test examples for MV '3'.
No test examples for 'others' MVs.

avg acc:  0.9423498694516971
total samples:  9575


In [67]:
print("2020\n")
test_manifest(test_2020, features, model=rf_2020, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2020, test_2020, features, label, threshold))
print("total samples: ",len(test_2020)) ## Len benign

2020

MV '2': # samples = 11232, accuracy = 0.9631
No test examples for MV '3'.
No test examples for 'others' MVs.

avg acc:  0.9631410256410257
total samples:  11232


In [68]:
print("2021\n")
test_manifest(test_2021, features, model=rf_2021, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2021, test_2021, features, label, threshold))
print("total samples: ",len(test_2021)) ## Len benign

2021

MV '2': # samples = 9784, accuracy = 0.9449
MV '3': # samples = 2072, accuracy = 0.9686
No test examples for 'others' MVs.

avg acc:  0.949055330634278
total samples:  11856


In [None]:
print("2022\n")
test_manifest(test_2022, features, model=rf_2022, threshold=threshold, label=label)
print("\navg acc: ", get_accuracy(rf_2022, test_2022, features, label, threshold))
print("total samples: ",len(test_2022)) ## Len benign