In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection
%ls

/content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection
 AdaBoostClassifier.joblib
 classifiers_recall_scores.joblib
 CNNClassifier.joblib
 DecisionTreeClassifier.joblib
 EDA.ipynb
 features_k_1000.joblib
 features_k_100.joblib
 features_k_10.joblib
 features_k_1200.joblib
 features_k_1500.joblib
 features_k_2000.joblib
 features_k_2500.joblib
 features_k_3000.joblib
 features_k_500.joblib
 features_k_50.joblib
 gnbClassifier.joblib
 GradientBoostingClassifier.joblib
 knnClassifier.joblib
 LSTMClassifier.joblib
'Macro Malware Detection using Machine Learning Techniques A New Approach '
 mlpClasifier.joblib
 [0m[01;34m__pycache__[0m/
 randomForestClassifier.joblib
 recall_scores.joblib
 RobertaClassifier.joblib
 svmClassifier.joblib
 test_dataset.csv
 test_loader.joblib
 test_loader.pkl
 tfidf_1000.joblib
 tfidf_100.joblib
 tfidf_10.joblib
 tfidf_1200.joblib
 tfidf_1500.joblib
 tfidf_2000.joblib
 tfidf_2500.joblib
 tfidf_3000.joblib
 tfidf_500.joblib
 tfidf_50.joblib
 train_

In [3]:
!pip install torch torchvision gensim



In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from gensim.models import Word2Vec
import numpy as np
import pandas as pd
from joblib import dump, load
from utils import CNNClassifier, CNNTrainer, TextDataset, save_loader

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

### Loading data

In [5]:
train_loader = load('/content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection/train_loader.joblib')
validation_loader = load('/content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection/val_loader.joblib')
test_loader = load('/content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection/test_loader.joblib')

In [6]:
word2vec_model = load('/content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection/word2vec_model.joblib')

### Defining CNN model

In [7]:
num_classes = 2
embed_dim = word2vec_model.vector_size
model = CNNClassifier(embed_dim, num_classes)

### Training

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

trainer = CNNTrainer(model, train_loader, validation_loader, test_loader, criterion, optimizer, device)
validation_accuracy, conf_matrix, precision, recall, f1_score = trainer.train(num_epochs=10)

Epoch [1/10], Loss: 0.2864, Validation Accuracy: 96.46%, Precision: 0.9454578291493588, Recall: 0.9860614051610472, F1-score: 0.9653328416005901
Epoch [2/10], Loss: 0.1245, Validation Accuracy: 97.45%, Precision: 0.955350686912509, Recall: 0.9954793746468261, F1-score: 0.9750023060603266
Epoch [3/10], Loss: 0.0911, Validation Accuracy: 98.05%, Precision: 0.9696244477172312, Recall: 0.9920889056319457, F1-score: 0.9807280513918629
Epoch [4/10], Loss: 0.0762, Validation Accuracy: 98.05%, Precision: 0.9653411163808829, Recall: 0.9967978903748352, F1-score: 0.9808173477898249
Epoch [5/10], Loss: 0.0616, Validation Accuracy: 98.59%, Precision: 0.9753086419753086, Recall: 0.9969862497645507, F1-score: 0.9860283159463488
Epoch [6/10], Loss: 0.0528, Validation Accuracy: 98.72%, Precision: 0.9783613833918994, Recall: 0.996421171595404, F1-score: 0.9873086972751026
Epoch [7/10], Loss: 0.0482, Validation Accuracy: 98.88%, Precision: 0.9818046787968808, Recall: 0.9960444528159729, F1-score: 0.9888

### Testing

In [9]:
test_accuracy, conf_matrix, precision, recall, f1_score = trainer.test()

Test Accuracy: 99.22%, Precision: 0.9880708294501398, Recall: 0.9964285714285714, F1-score: 0.9922321010762751


In [10]:
print(f"Test Accuracy: {test_accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")

Test Accuracy: 99.21919096895579
Precision: 0.9880708294501398
Recall: 0.9964285714285714
F1 Score: 0.9922321010762751


In [11]:
print(conf_matrix)

[[5246   64]
 [  19 5301]]


In [12]:
save_loader('/content/drive/MyDrive/Colab Notebooks/Malicious Macro Detection/CNNClassifier.joblib', model)

CNNClassifier(
  (conv1): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
  (conv2): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  (conv3): Conv2d(1, 100, kernel_size=(5, 100), stride=(1, 1))
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=300, out_features=2, bias=True)
) saved sucessfuly
