In [1]:
# Install required packages if not already installed
!pip install --upgrade transformers datasets accelerate peft bitsandbytes
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Collecting transformers
  Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/db/5a/022ac010bedfb5119734cf9d743cf1d830cb4c604f53bb1552216f4344dc/transformers-4.55.2-py3-none-any.whl.metadata
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.55.2-py3-none-any.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.55.1
    Uninstalling transformers-4.55.1:
      Successfully uninstalled transformers-4.55.1
Successfully installed transformers-4.55.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m

In [2]:
import os
import csv
import pandas as pd
import torch
import numpy as np
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    default_data_collator,
)
import warnings
warnings.filterwarnings("ignore")

# Force NumPy to use 1.x compatibility mode
os.environ['NUMPY_EXPERIMENTAL_ARRAY_FUNCTION'] = '0'

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"PyTorch version: {torch.__version__}")
print(f"NumPy version: {np.__version__}")


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/arushijain/PycharmProjects/ResearchBased/venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/arushijain/PycharmProjects/ResearchBased/venv/li

Using device: cpu
PyTorch version: 2.2.2
NumPy version: 2.2.6


In [7]:
# Configuration
DATA_DIR = "/Users/arushijain/PycharmProjects/ResearchBased/datasets"
TRAIN_CSV = f"{DATA_DIR}/train.csv"
VAL_CSV = f"{DATA_DIR}/valid.csv"
SAVE_DIR = f"{DATA_DIR}/codebert_base_finetuned"

# Create save directory if it doesn't exist
os.makedirs(SAVE_DIR, exist_ok=True)

MODEL_ID = "microsoft/codebert-base"

print(f"Data directory: {DATA_DIR}")
print(f"Save directory: {SAVE_DIR}")
print(f"Model ID: {MODEL_ID}")

Data directory: /Users/arushijain/PycharmProjects/ResearchBased/datasets
Save directory: /Users/arushijain/PycharmProjects/ResearchBased/datasets/codebert_base_finetuned
Model ID: microsoft/codebert-base


In [8]:
# Load CSVs
def load_csv(path):
    try:
        df = pd.read_csv(path, on_bad_lines="skip", quoting=csv.QUOTE_NONE, encoding="utf-8", engine="python")
        df["target"] = pd.to_numeric(df["target"], errors="coerce").fillna(0).astype(int)
        df["func"] = df["func"].fillna("").astype(str)
        return df[["func", "target"]]
    except Exception as e:
        print(f"Error loading {path}: {e}")
        return pd.DataFrame(columns=["func", "target"])

print("Loading datasets...")
train_df = load_csv(TRAIN_CSV)
val_df = load_csv(VAL_CSV)

print(f"Train dataset size: {len(train_df)}")
print(f"Validation dataset size: {len(val_df)}")

# Subsample if datasets are too large
if len(train_df) > 7000:
    train_df = train_df.sample(n=7000, random_state=42)
if len(val_df) > 1000:
    val_df = val_df.sample(n=1000, random_state=42)

print(f"After subsampling - Train: {len(train_df)}, Val: {len(val_df)}")

# Display sample data
print("\nSample training data:")
print(train_df.head())

Loading datasets...
Train dataset size: 1140131
Validation dataset size: 137484
After subsampling - Train: 7000, Val: 1000

Sample training data:
        func  target
580080             0
165278             0
708485             0
1058642            0
472563             0


In [10]:
# Load tokenizer
print("Loading tokenizer...")
try:
    tok = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=False)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    print("Tokenizer loaded successfully!")
    print(f"Pad token: {tok.pad_token}")
    print(f"EOS token: {tok.eos_token}")
except Exception as e:
    print(f"Error loading tokenizer: {e}")
    raise

Loading tokenizer...
Tokenizer loaded successfully!
Pad token: <pad>
EOS token: </s>


In [12]:
# Load model
print("Loading model...")
dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else torch.float16
print(f"Using dtype: {dtype}")

try:
    # Try loading with device_map first (for multi-GPU or large models)
    if torch.cuda.is_available():
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID, 
            torch_dtype=dtype, 
            trust_remote_code=False,
            device_map="auto",
            use_safetensors=True
        )
        print("Model loaded with device_map='auto'")
    else:
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID, 
            trust_remote_code=False,
            use_safetensors=True
        )
        model = model.to(device)
        print("Model loaded and moved to device")
    
    print("Model loaded successfully!")
    print(f"Model device: {next(model.parameters()).device}")
    
except Exception as e:
    print(f"Error loading model with device_map: {e}")
    print("Trying alternative approach...")
    
    # Fallback: try without device_map
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID, 
        torch_dtype=dtype, 
        trust_remote_code=False,
        use_safetensors=True
    )
    
    if getattr(model.config, "pad_token_id", None) is None:
        model.config.pad_token_id = tok.pad_token_id
    
    # Move to device manually
    model = model.to(device)
    print("Model loaded with fallback approach!")
    print(f"Model device: {next(model.parameters()).device}")

Loading model...
Using dtype: torch.float16


If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of RobertaForCausalLM were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded and moved to device
Model loaded successfully!
Model device: cpu


In [15]:
# Training arguments
args = TrainingArguments(
    output_dir=SAVE_DIR,
    per_device_train_batch_size=1,      
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,      
    num_train_epochs=0.5,             
    learning_rate=1e-5,                
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    logging_steps=25,
    report_to="none",
    warmup_steps=50,
    weight_decay=0.01,
    gradient_checkpointing=True,       
    dataloader_pin_memory=False,
    remove_unused_columns=False,
)

print("Training arguments configured:")
print(f"Output directory: {args.output_dir}")
print(f"Batch size: {args.per_device_train_batch_size}")
print(f"Gradient accumulation steps: {args.gradient_accumulation_steps}")
print(f"Learning rate: {args.learning_rate}")
print(f"Epochs: {args.num_train_epochs}")

Training arguments configured:
Output directory: /Users/arushijain/PycharmProjects/ResearchBased/datasets/codebert_base_finetuned
Batch size: 1
Gradient accumulation steps: 8
Learning rate: 1e-05
Epochs: 0.5


In [20]:
# Build features
MAX_PROMPT = 500
MAX_FULL = 514

def encode_row(func_text: str, label_int: int):
    label = "Yes" if int(label_int) == 1 else "No"
    prompt = (
        "Below is a C function. Determine if it contains security vulnerabilities.\n\n"
        f"Function:\n{func_text}\n\n"
        "Question: Does this function contain security vulnerabilities?\n"
        "Answer:"
    )
    ans = " " + label

    tok_prompt = tok(prompt, truncation=True, max_length=MAX_PROMPT)
    tok_full = tok(prompt + ans, truncation=True, max_length=MAX_FULL, padding="max_length")

    input_ids = tok_full["input_ids"]
    attn = tok_full["attention_mask"]
    labels = list(input_ids)
    plen = len(tok_prompt["input_ids"])
    for i in range(min(plen, len(labels))):
        labels[i] = -100  # mask prompt; loss only on " Answer: Yes/No"
    
    return {
        "input_ids": torch.tensor(input_ids, dtype=torch.long),
        "attention_mask": torch.tensor(attn, dtype=torch.long),
        "labels": torch.tensor(labels, dtype=torch.long),
    }

class SimpleMapDataset(Dataset):
    def __init__(self, df):
        self.funcs = df["func"].tolist()
        self.targets = df["target"].tolist()
    
    def __len__(self):
        return len(self.funcs)
    
    def __getitem__(self, i):
        return encode_row(self.funcs[i], self.targets[i])

train_ds = SimpleMapDataset(train_df)
val_ds = SimpleMapDataset(val_df)

print("Datasets prepared successfully!")
print(f"Training samples: {len(train_ds)}")
print(f"Validation samples: {len(val_ds)}")

# Test encoding
sample = train_ds[0]
print(f"\nSample encoded data:")
print(f"Input shape: {sample['input_ids'].shape}")
print(f"Attention mask shape: {sample['attention_mask'].shape}")
print(f"Labels shape: {sample['labels'].shape}")

Datasets prepared successfully!
Training samples: 7000
Validation samples: 1000

Sample encoded data:
Input shape: torch.Size([514])
Attention mask shape: torch.Size([514])
Labels shape: torch.Size([514])


In [21]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    data_collator=default_data_collator,
    tokenizer=tok,
)

print("Trainer initialized successfully!")
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

Trainer initialized successfully!
Model parameters: 124,697,433
Trainable parameters: 124,697,433


In [22]:
# Start training
print("Starting training...")
print("=" * 50)

try:
    # Train the model
    trainer.train()
    
    print("\nTraining completed successfully!")
    
    # Save the model
    print("Saving model...")
    trainer.save_model(SAVE_DIR)
    tok.save_pretrained(SAVE_DIR)
    
    print(f"Model saved to: {SAVE_DIR}")
    
except Exception as e:
    print(f"Error during training: {e}")
    # Try to save anyway
    try:
        trainer.save_model(SAVE_DIR)
        tok.save_pretrained(SAVE_DIR)
        print(f"Model saved despite error to: {SAVE_DIR}")
    except Exception as save_error:
        print(f"Could not save model: {save_error}")

Starting training...


Epoch,Training Loss,Validation Loss


Error during training: Numpy is not available
Model saved despite error to: /Users/arushijain/PycharmProjects/ResearchBased/datasets/codebert_base_finetuned


In [23]:
# Verify saved model
print("Verifying saved model...")
saved_files = os.listdir(SAVE_DIR)
print(f"Files in {SAVE_DIR}:")
for file in saved_files:
    print(f"  - {file}")

print("\nModel training and saving completed!")

Verifying saved model...
Files in /Users/arushijain/PycharmProjects/ResearchBased/datasets/codebert_base_finetuned:
  - model.safetensors
  - tokenizer_config.json
  - special_tokens_map.json
  - config.json
  - tokenizer.json
  - generation_config.json
  - merges.txt
  - training_args.bin
  - vocab.json

Model training and saving completed!


In [25]:
pip install scikit-learn pandas seaborn matplotlib

Python(18217) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting scikit-learn
  Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/74/88/0dd5be14ef19f2d80a77780be35a33aa94e8a3b3223d80bee8892a7832b4/scikit_learn-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata
  Downloading scikit_learn-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata (11 kB)
Collecting seaborn
  Obtaining dependency information for seaborn from https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl.metadata
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting matplotlib
  Obtaining dependency information for matplotlib from https://files.pythonhosted.org/packages/d1/89/5355cdfe43242cb4d1a64a67cb6831398b665ad90e9702c16247cbd8d5ab/matplotlib-3.10.5-cp310-cp310-macosx_10_12_x86_64.whl.metadata
  Downloading matplotlib-3.10.5-cp310-cp310-macosx_10_12_x86_64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Ob

In [55]:
import os
import csv
import pandas as pd
import torch
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support, 
    classification_report, confusion_matrix, roc_curve, auc,
    precision_recall_curve, average_precision_score
)
from transformers import AutoTokenizer, AutoModelForCausalLM
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Set style for better plots
plt.style.use('default')
sns.set_palette("husl")

print("Starting CodeBERT Model Evaluation...")
print("=" * 60)

# Configuration
DATA_DIR = "/Users/arushijain/PycharmProjects/ResearchBased/datasets"
MODEL_PATH = f"{DATA_DIR}/codebert_base_finetuned"
TEST_CSV = f"{DATA_DIR}/test.csv"

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Starting CodeBERT Model Evaluation...
Using device: cpu


In [56]:
def load_csv_multiline(path):
    """Load CSV with multi-line functions properly"""
    functions = []
    targets = []
    
    with open(path, 'r', encoding='utf-8', errors='ignore') as file:
        content = file.read()
    
    # Split by lines and process
    lines = content.split('\n')
    i = 0
    
    while i < len(lines):
        line = lines[i].strip()
        
        if line.startswith('FFmpeg,'):  # Start of a new record
            parts = line.split(',', 3)  # Split into 4 parts
            if len(parts) >= 4:
                target = int(parts[2]) if parts[2].isdigit() else 0
                func_start = parts[3].strip('"')  # Remove opening quote
                
                # Collect function text across multiple lines
                func_text = func_start
                i += 1
                
                # Continue collecting function text until we hit the end
                while i < len(lines) and not lines[i].strip().startswith('FFmpeg,'):
                    if lines[i].strip() and not lines[i].strip().startswith('FFmpeg,'):
                        func_text += '\n' + lines[i].strip()
                    i += 1
                
                # Clean up function text
                func_text = func_text.strip('"')  # Remove closing quote
                functions.append(func_text)
                targets.append(target)
            else:
                i += 1
        else:
            i += 1
    
    return pd.DataFrame({'func': functions, 'target': targets})

In [57]:
def predict_vulnerability(model, tokenizer, func_text, max_length=500):
    """Predict vulnerability using the fine-tuned CodeBERT model"""
    try:
        prompt = (
            "Below is a C function. Determine if it contains security vulnerabilities.\n\n"
            f"Function:\n{func_text}\n\n"
            "Question: Does this function contain security vulnerabilities?\n"
            "Answer:"
        )
        
        # Tokenize with shorter max length
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length, padding="max_length")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Generate prediction
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=5,
                do_sample=False,
                temperature=0.0,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Decode prediction
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract answer
        answer = generated_text.split("Answer:")[-1].strip().lower()
        
        # Classify
        if "yes" in answer:
            return 1  # Vulnerable
        elif "no" in answer:
            return 0  # Safe
        else:
            # Fallback classification
            return 1 if "vulnerable" in answer or "security" in answer else 0
            
    except Exception as e:
        print(f"Error in prediction: {e}")
        return 0  # Default to safe if error occurs

In [61]:
# Fast evaluation with progress tracking
print("Starting fast CodeBERT evaluation...")

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
model = model.to(device)
model.eval()

# Load test data
test_df = load_csv_multiline(TEST_CSV)
print(f"Test dataset loaded: {len(test_df)} samples")

# Run predictions with better progress tracking
predictions = []
actuals = []

print("Starting predictions...")
for i, (_, row) in enumerate(test_df.iterrows()):
    try:
        pred = predict_vulnerability(model, tokenizer, row["func"])
        predictions.append(pred)
        actuals.append(row["target"])
        
        if i % 50 == 0:  # Progress every 50 samples
            print(f"   Processed {i}/{len(test_df)} samples...")
            
    except Exception as e:
        print(f"   Error on sample {i}: {e}")
        continue

print(f"Evaluation completed! Total predictions: {len(predictions)}")

# Calculate and display metrics
if len(predictions) > 0:
    predictions = np.array(predictions)
    actuals = np.array(actuals)
    
    accuracy = accuracy_score(actuals, predictions)
    precision, recall, f1, support = precision_recall_fscore_support(actuals, predictions, average=None)
    
    print(f"\nRESULTS:")
    print(f"Accuracy: {accuracy:.3f}")
    print(f"Macro F1: {np.mean(f1):.3f}")
    print(f"Precision (Safe): {precision[0]:.3f}")
    print(f"Recall (Safe): {recall[0]:.3f}")
    print(f"Precision (Vulnerable): {precision[1]:.3f}")
    print(f"Recall (Vulnerable): {recall[1]:.3f}")
    
    # Show confusion matrix
    cm = confusion_matrix(actuals, predictions)
    print(f"\nConfusion Matrix:")
    print(f"Safe: {cm[0][0]} (TN) | {cm[0][1]} (FP)")
    print(f"Vulnerable: {cm[1][0]} (FN) | {cm[1][1]} (TP)")
else:
    print("No predictions generated!")

Starting fast CodeBERT evaluation...


If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Test dataset loaded: 976 samples
Starting predictions...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


   Processed 0/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 50/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 100/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 150/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 200/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 250/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 300/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 350/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 400/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 450/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 500/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 550/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 600/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 650/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 700/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 750/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 800/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 850/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 900/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

   Processed 950/976 samples...


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

Evaluation completed! Total predictions: 976

RESULTS:
Accuracy: 0.503
Macro F1: 0.499
Precision (Safe): 0.482
Recall (Safe): 0.619
Precision (Vulnerable): 0.535
Recall (Vulnerable): 0.398

Confusion Matrix:
Safe: 287 (TN) | 177 (FP)
Vulnerable: 308 (FN) | 204 (TP)
