In [1]:
# !pip install transformers datasets huggingface_hub tensorboard==2.11
# !pip install transformers datasets scikit-learn torch
# !pip install hf_xet

In [2]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, ClassLabel
import pandas as pd
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
csv_path = "harmful_training.csv"
dataset = load_dataset("csv",data_files=csv_path)

In [4]:
label_list = list(set(dataset["train"]["label"]))
num_labels = len(label_list)

In [5]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaForSequenceClassification.from_pretrained("roberta-base",num_labels=num_labels)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True, max_length=256)

In [7]:
tokenized_datasets = dataset.map(tokenize, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

In [8]:
tokenized_datasets = tokenized_datasets["train"].train_test_split(test_size=0.2)
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]

In [9]:
def compute_metrics(pred):
    logits, labels = pred
    preds = np.argmax(logits, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

In [10]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

In [13]:
if not hasattr(np, 'bool8'):
    np.bool8 = np.bool_
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [15]:
trainer.train()

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`labels` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [18]:
from datasets import load_dataset, ClassLabel
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 1. Load the CSV
dataset = load_dataset("csv", data_files="harmful_training.csv")  # replace with actual path

# 2. Extract unique labels and convert to ClassLabel
labels = list(set(dataset["train"]["label"]))
labels.sort()  # optional: makes mapping consistent
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for label, i in label2id.items()}

# 3. Map string labels to integer IDs
def encode_labels(example):
    example["label"] = label2id[example["label"]]
    return example

dataset = dataset.map(encode_labels)

# 4. Tokenization
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length")

dataset = dataset.map(tokenize, batched=True)

# 5. Set format for PyTorch
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# 6. Train/test split
dataset = dataset["train"].train_test_split(test_size=0.3)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

# 7. Model setup
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

# 8. Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs"
)

# 9. Evaluation metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, preds, average="weighted")
    return {"accuracy": acc, "precision": prec, "recall": rec, "f1": f1}

# 10. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# 11. Train
trainer.train()

# 12. Evaluate
trainer.evaluate()


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.949579,0.862069,0.889984,0.862069,0.857304
2,No log,0.464432,0.931034,0.939655,0.931034,0.930478
3,No log,0.336184,0.931034,0.939655,0.931034,0.930478




{'eval_loss': 0.3361838757991791,
 'eval_accuracy': 0.9310344827586207,
 'eval_precision': 0.9396551724137931,
 'eval_recall': 0.9310344827586207,
 'eval_f1': 0.9304779428467583,
 'eval_runtime': 5.9405,
 'eval_samples_per_second': 4.882,
 'eval_steps_per_second': 2.525,
 'epoch': 3.0}

In [27]:
from transformers import TextClassificationPipeline

# Create pipeline for easy prediction
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True, device=0 if torch.cuda.is_available() else -1)

# Test examples
test_sentences = [
    "how do i hack my institute's network",
    "How do I build a bomb ?",
    "whats my ip address",
    "youre stupid",
    "youre nice"
]

# Run predictions
preds = pipe(test_sentences)

# Show results
for sentence, pred in zip(test_sentences, preds):
    top_label = max(pred, key=lambda x: x["score"])
    print(f"Input: {sentence}")
    print(f"Prediction: {top_label['label']} ({top_label['score']:.4f})\n")


Device set to use cpu


Input: how do i hack my institute's network
Prediction: crime (0.9778)

Input: How do I build a bomb ?
Prediction: crime (0.9804)

Input: whats my ip address
Prediction: crime (0.9493)

Input: youre stupid
Prediction: insult (0.7963)

Input: youre nice
Prediction: threat (0.4433)



