In [1]:
import pandas as pd
import numpy as np

X_train = pd.read_csv('./cvss_2022_2024_X_train.csv')
y_train = pd.read_csv('./cvss_2022_2024_y_train.csv')

X_test = pd.read_csv('./cvss_2022_2024_X_test.csv')
y_test = pd.read_csv('./cvss_2022_2024_y_test.csv')

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

print(y_train['cvssv3_confidentiality_impact'].value_counts(dropna=False) / y_train.shape[0])
print(y_test['cvssv3_confidentiality_impact'].value_counts(dropna=False) / y_test.shape[0])
print(X_train.info())
print(y_train.info())

(80769, 1)
(20193, 1)
(80769, 1)
(20193, 1)
cvssv3_confidentiality_impact
NONE    0.333990
HIGH    0.333383
LOW     0.332628
Name: count, dtype: float64
cvssv3_confidentiality_impact
LOW     0.336156
HIGH    0.333135
NONE    0.330709
Name: count, dtype: float64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80769 entries, 0 to 80768
Data columns (total 1 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   english_description  80769 non-null  object
dtypes: object(1)
memory usage: 631.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80769 entries, 0 to 80768
Data columns (total 1 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   cvssv3_confidentiality_impact  80769 non-null  object
dtypes: object(1)
memory usage: 631.1+ KB
None


In [2]:
label_column_name = "cvssv3_confidentiality_impact"
train_labels = y_train.loc[:, label_column_name]
test_labels = y_test.loc[:, label_column_name]


from sklearn.preprocessing import LabelEncoder
import pickle

le = LabelEncoder()
le.fit(train_labels)
print(le.classes_)

with open("../labels/confidentiality_impact_label.txt", "wb") as f:
    pickle.dump(le.classes_, f)

NUM_CLASSES = len(le.classes_)
print(NUM_CLASSES)

encoded_train_labels = le.transform(train_labels)
encoded_test_labels = le.transform(test_labels)

print(train_labels[:10], encoded_train_labels[:10])
print(len(X_train), len(train_labels), len(X_test), len(test_labels))

['HIGH' 'LOW' 'NONE']
3
0     LOW
1     LOW
2    HIGH
3    HIGH
4    NONE
5     LOW
6     LOW
7     LOW
8    HIGH
9    NONE
Name: cvssv3_confidentiality_impact, dtype: object [1 1 0 0 2 1 1 1 0 2]
80769 80769 20193 20193


In [3]:
from transformers import BertTokenizerFast

tokenizer = BertTokenizerFast.from_pretrained('prajjwal1/bert-small')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
train_encodings = tokenizer(X_train.loc[:,"english_description"].tolist(), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(X_test.loc[:,"english_description"].tolist(), truncation=True, padding=True, max_length=128)

In [5]:
import torch

class CVEDataset(torch.utils.data.Dataset):
    def __init__(self, X, encodings, labels, encoded_labels):
        self.texts = X.loc[:,"english_description"].tolist()
        self.encodings = encodings
        self.labels = labels.tolist()
        self.encoded_labels = encoded_labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['text_labels'] = self.labels[idx]
        item['encoded_labels'] = torch.tensor(self.encoded_labels[idx])
        item['vulnerability_description'] = self.texts[idx]
        
        return item

    def __len__(self):
        return len(self.labels)


In [6]:
train_dataset = CVEDataset(X_train, train_encodings, train_labels, encoded_train_labels)
test_dataset = CVEDataset(X_test, test_encodings, test_labels, encoded_test_labels)

In [7]:
import torch
from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader
import torch.nn.functional as F

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = BertForSequenceClassification.from_pretrained('../models/bert-small-vulnerability_confidentiality_impact-classification')
model.to(device) 
model.eval()

test_loader = DataLoader(test_dataset, batch_size=16)


num_correct = 0 
num_examples = 0
test_loss = 0
predicted_labels_list = []
predicted_labels_score_list = []
for batch in test_loader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['encoded_labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
    loss = outputs[0]
    test_loss += loss.data.item() * input_ids.size(0)
    predicted_labels = torch.max(F.softmax(outputs.logits, dim=1), dim=1)[1]
    predicted_labels_list.extend(predicted_labels.tolist())
    predicted_labels_score = torch.max(F.softmax(outputs.logits, dim=1), dim=1)[0]
    predicted_labels_score_list.extend(predicted_labels_score.tolist())
    correct = torch.eq(predicted_labels, labels)
    num_correct += torch.sum(correct).item()
    num_examples += correct.shape[0]
test_loss /= len(test_loader.dataset)

        
print('Test Loss: {}, Test Accuracy = {}'.format(test_loss, num_correct / num_examples))
print('predicted labels:', predicted_labels_list, 'scores:', predicted_labels_score_list)

Test Loss: 0.22315704129927347, Test Accuracy = 0.9251720893378894
predicted labels: [1, 1, 2, 1, 1, 0, 1, 0, 2, 0, 2, 2, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 2, 0, 0, 2, 1, 2, 0, 0, 2, 1, 2, 2, 0, 2, 1, 1, 1, 1, 2, 1, 1, 0, 2, 1, 0, 0, 2, 2, 2, 1, 1, 0, 0, 2, 0, 0, 0, 2, 1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 2, 1, 0, 1, 0, 1, 0, 1, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 1, 1, 1, 2, 1, 2, 0, 1, 1, 0, 1, 2, 0, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 0, 2, 1, 0, 1, 1, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 1, 2, 2, 2, 2, 0, 2, 0, 0, 0, 1, 2, 2, 0, 0, 2, 0, 1, 1, 1, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 0, 1, 1, 1, 1, 2, 1, 2, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 0, 1, 2, 0, 1, 0, 0, 0, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 2, 0, 2, 2, 2, 0, 2, 1, 2, 2, 1, 1, 1, 1, 0, 2, 2, 0, 2, 1, 2, 0, 0, 2, 1, 1, 2, 0, 2, 1, 1, 0, 2, 1, 1,

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, balanced_accuracy_score

# Assuming test_dataset and predicted_labels_list are defined
y_true = test_dataset[:]['encoded_labels'].numpy()
y_pred = np.array(predicted_labels_list)

# Metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
balanced_accuracy = balanced_accuracy_score(y_true, y_pred)

    
# Print results
print("Accuracy: ", accuracy)
print("Balanced Accuracy: ", balanced_accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))


Accuracy:  0.9251720893378894
Balanced Accuracy:  0.9251388301699563
Precision:  0.9255762083886081
Recall:  0.9251720893378894
F1 Score:  0.9246057024282802

Confusion Matrix:
[[5788  484  455]
 [ 169 6517  102]
 [ 229   72 6377]]
