# Classifier

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import json
from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
from transformers import AutoConfig
from torch.utils.data import DataLoader
import os
os.environ["MPLBACKEND"] = "Agg"

import matplotlib
matplotlib.use('Agg', force=True)
from matplotlib import pyplot as plt

import random
from tqdm import tqdm
import numpy as np
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score, roc_auc_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split

In [2]:
#Prepare dataset
data = []
with open("sec-desc.jsonl", 'r') as file:
    for i in file:
        data.append(json.loads(i))

In [3]:
#Fine tuining the embedding model through a classification task
class ModuleEmbedderHead(nn.Module):
    def __init__(self, embedding_model):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(embedding_model)

     #Classification head that output logits
        self.classifier = nn.Sequential(
            nn.Linear(self.encoder.config.hidden_size, 256),
            nn.ReLU(),
            nn.Linear(256, 1))

    #Encoding function
    def encoding(self, inputs):
        outputs = self.encoder(**inputs)
        #CLS Embeddings
        embedding = outputs.last_hidden_state[:, 0]
        return embedding

    #Classification function
    def classifying(self, inputs):
        return self.classifier(inputs)

    #Forward function
    def forward(self, inputs):
        embeddings = self.encoding(inputs)
        output_logits = self.classifying(embeddings)
        return output_logits

In [4]:
#Train Classifier function
def train_classifier(model, tokenizer, dataloader, data, epochs=5, lr=2e-5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    #Initialize optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    #Initialize loss
    bce= nn.BCEWithLogitsLoss()

    torch.manual_seed(42);
    for epoch in range(epochs):
        total_loss = 0

        for batch in tqdm(dataloader):
            model.train()
            safe=[]
            unsafe=[]
            for i in batch:
                safe.append(i["func_src_after"])
                unsafe.append(i["func_src_before"])
            #Tokenize the inputs
            safe_tokens = tokenizer(safe, return_tensors="pt", truncation=True, padding=True).to(device)
            unsafe_tokens = tokenizer(unsafe, return_tensors="pt", truncation=True, padding=True).to(device)

            #Foward pass to get the logits
            pos_logits = model(safe_tokens)
            neg_logits = model(unsafe_tokens)

            #Get true labels (1 if safe and 0 if unsafe)
            pos_labels = torch.ones_like(pos_logits)
            labels_neg = torch.zeros_like(neg_logits)

            #Calculate total loss
            safe_loss = bce(pos_logits, pos_labels)
            unsafe_loss = bce(neg_logits, labels_neg)
            loss = safe_loss + unsafe_loss

            #Update model
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}: Avg Loss = {total_loss / len(data)}")

    torch.save(model.state_dict(), "classifier.pt")

In [5]:
#Evaluation function
def evaluate_classifier(model, tokenizer,  data):
    model.eval()
    predictions = []
    true_labels  = []
    pred_bin = []
    device = next(model.parameters()).device

    for i in data:
       #Get safe and unsafe code
       safe = i['func_src_after']
       unsafe = i['func_src_before']

       #Tokenize safe code
       tokens_safe = tokenizer(safe, return_tensors="pt", truncation=True, padding=True).to(device)
       #Calculate probability of the code being safe according to classifier
       with torch.no_grad():
           logit = model(tokens_safe)
           prob_safe = torch.sigmoid(torch.tensor(logit).squeeze()).item()
       #Append prediction and true label
       predictions.append(prob_safe)
       true_labels.append(1)

       #Tokenize unsafe code
       tokens_unsafe = tokenizer(unsafe, return_tensors="pt", truncation=True, padding=True).to(device)
       #Calculate probability of the code being unsafe according to classifier
       with torch.no_grad():
           logit = model(tokens_unsafe)
           prob_unsafe = torch.sigmoid(torch.tensor(logit).squeeze()).item()
       #Append prediction and true label
       predictions.append(prob_unsafe)
       true_labels.append(0)

    #Binarize the prediction
    for i in predictions:
        pred_bin.append(int(i >= 0.5))
    #Calculate metrics
    auc = roc_auc_score(true_labels, predictions)
    acc = accuracy_score(true_labels, pred_bin)
    precision, recall, f1, x = precision_recall_fscore_support(true_labels, pred_bin, average="binary")

    print(f"Accuracy: {acc} | AUC: {auc} | Precision: {precision} | Recall: {recall} | F1: {f1}")

In [6]:
#Split data into train and test
train_triplets, test_triplets = train_test_split(
    data, test_size=0.2, random_state=42)
batch_size = 1

#Prepare data for train
dataloader = DataLoader(train_triplets, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)

#Define the embedding model
embedding_model = "microsoft/codebert-base"
#Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(embedding_model)
#Initialize classifier
classifier_model = ModuleEmbedderHead(embedding_model)

#Evaluate classifier before training
evaluate_classifier(classifier_model, tokenizer ,  test_triplets)
#Train
train_classifier(classifier_model, tokenizer , dataloader, train_triplets, epochs=8)
#Evaluate after training
evaluate_classifier(classifier_model, tokenizer ,  test_triplets)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

  prob_safe = torch.sigmoid(torch.tensor(logit).squeeze()).item()
  prob_unsafe = torch.sigmoid(torch.tensor(logit).squeeze()).item()
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.5 | AUC: 0.5084635416666667 | Precision: 0.0 | Recall: 0.0 | F1: 0.0


100%|██████████| 576/576 [01:10<00:00,  8.12it/s]


Epoch 1: Avg Loss = 1.3905275110155344


100%|██████████| 576/576 [01:10<00:00,  8.12it/s]


Epoch 2: Avg Loss = 1.3679442123199503


100%|██████████| 576/576 [01:11<00:00,  8.10it/s]


Epoch 3: Avg Loss = 1.3140119785805129


100%|██████████| 576/576 [01:10<00:00,  8.12it/s]


Epoch 4: Avg Loss = 1.2373567150983338


100%|██████████| 576/576 [01:10<00:00,  8.13it/s]


Epoch 5: Avg Loss = 1.1275025060620263


100%|██████████| 576/576 [01:11<00:00,  8.08it/s]


Epoch 6: Avg Loss = 1.1057780339938796


100%|██████████| 576/576 [01:11<00:00,  8.09it/s]


Epoch 7: Avg Loss = 1.0433893237883847


100%|██████████| 576/576 [01:11<00:00,  8.07it/s]


Epoch 8: Avg Loss = 1.030723959944832


  prob_safe = torch.sigmoid(torch.tensor(logit).squeeze()).item()
  prob_unsafe = torch.sigmoid(torch.tensor(logit).squeeze()).item()


Accuracy: 0.6770833333333334 | AUC: 0.7824797453703703 | Precision: 0.656441717791411 | Recall: 0.7430555555555556 | F1: 0.6970684039087948


In [7]:
from sklearn.decomposition import PCA
processed_data = []

for item in data:
    processed_data.append((item["func_src_after"], 1))
    processed_data.append((item["func_src_before"], 0))

embeddings = []
labels = []

device= next(classifier_model.parameters()).device

for code, label in processed_data:
    #Tokenize
    inputs = tokenizer(code, return_tensors="pt", truncation=True, padding=True)
    for k in inputs:
        inputs[k] = inputs[k].to(device)

    # Get embedding
    embedding = classifier_model.encoding(inputs).squeeze(0).cpu().detach().numpy()
    embeddings.append(embedding)
    labels.append(label)

embeddings = np.array(embeddings)
labels = np.array(labels)

#Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
tsne_reduced = tsne.fit_transform(embeddings)

#Apply PCA
pca = PCA(n_components=2)
pca_reduced = pca.fit_transform(embeddings)

#Create plot
plt.figure(figsize=(14, 6))



# t-SNE Plot
plt.subplot(1, 2, 1)
for label in [0, 1]:
    idx = np.where(labels == label)[0]
    if label == 1:
        color = "#77dd77"
        text = "Safe"
    else:
        color = "#ff6961"
        text = "Unsafe"
    plt.scatter(tsne_reduced[idx, 0], tsne_reduced[idx, 1], c=color, label=text, alpha=0.6)
plt.title("t-SNE of Fine-Tuned GraphCodeBERT Embeddings")
plt.xlabel("t-SNE Dim 1")
plt.ylabel("t-SNE Dim 2")
plt.grid(True)
plt.legend()

# PCA Plot
plt.subplot(1, 2, 2)
for label in [0, 1]:
    idx = np.where(labels == label)[0]
    if label == 1:
        color = "#77dd77"
        text = "Safe"
    else:
        color = "#ff6961"
        text = "Unsafe"
    plt.scatter(pca_reduced[idx, 0], pca_reduced[idx, 1], c=color, label=text, alpha=0.6)
plt.title("PCA of Fine-Tuned CodeBERT Embeddings")
plt.xlabel("PCA Dim 1")
plt.ylabel("PCA Dim 2")
plt.legend()

plt.tight_layout()
plt.savefig("tsne_pca_codebert.png")
plt.show()

In [10]:
#Load base CodeBERT
model_name = "microsoft/codebert-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval().cuda()

processed_data = []
for item in data:  # your dataset
    processed_data.append((item["func_src_after"], 1))   # Safe
    processed_data.append((item["func_src_before"], 0))  # Unsafe

embeddings = []
labels = []

for code, label in processed_data:

    #Tokenize
    tokens = tokenizer(code, return_tensors="pt", truncation=True, padding=True).to(device)
    #Forward pass and get the CLS embeddings
    output = model(**tokens)
    cls_embedding = output.last_hidden_state[:, 0].squeeze().cpu().detach().numpy()
    embeddings.append(cls_embedding)
    labels.append(label)

embeddings = np.array(embeddings)
labels = np.array(labels)

#Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
tsne_embeds = tsne.fit_transform(embeddings)

#Apply PCA
pca = PCA(n_components=2)
pca_embeds = pca.fit_transform(embeddings)

#Create plot
plt.figure(figsize=(14, 6))

# t-SNE
plt.subplot(1, 2, 1)
for label in [0, 1]:
    idx = np.where(labels == label)[0]
    if label == 1:
        color = "#77dd77"
        text = "Safe"
    else:
        color = "#ff6961"
        text = "Unsafe"
    plt.scatter(tsne_embeds[idx, 0], tsne_embeds[idx, 1], c=color, label=text, alpha=0.6)
plt.title("t-SNE of CodeBERT Embeddings")
plt.legend()
plt.grid(True)

# PCA
plt.subplot(1, 2, 2)
for label in [0, 1]:
    idx = np.where(labels == label)[0]
    if label == 1:
        color = "#77dd77"
        text = "Safe"
    else:
        color = "#ff6961"
        text = "Unsafe"
    plt.scatter(pca_embeds[idx, 0], pca_embeds[idx, 1], c=color, label=text, alpha=0.6)
plt.title("PCA of CodeBERT Embeddings")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("codebert_before_tsne_pca.png")
plt.show()
