In [1]:
!pip install torch

Collecting torch
  Downloading torch-2.7.1-cp311-cp311-win_amd64.whl.metadata (28 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.7.1-cp311-cp311-win_amd64.whl (216.1 MB)
   ---------------------------------------- 0.0/216.1 MB ? eta -:--:--
   ---------------------------------------- 0.8/216.1 MB 4.8 MB/s eta 0:00:46
   ---------------------------------------- 1.8/216.1 MB 4.4 MB/s eta 0:00:50
   ---------------------------------------- 2.4/216.1 MB 4.8 MB/s eta 0:00:45
   ---------------------------------------- 2.4/216.1 MB 4.8 MB/s eta 0:00:45
   ---------------------------------------- 2.6/216.1 MB 2.8 MB/s eta 0:01:17
    --------------------------------------- 2.9/216.1 MB 2.5 MB/s eta 0:01:2

In [2]:
import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast
import joblib
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import joblib

df = pd.read_csv("BullyingMultiClase.csv")
le = LabelEncoder()
le.fit(df["label"])  # o df["labels"], según tu CSV

joblib.dump(le, "label_encoder.pkl")  # guardarlo

['label_encoder.pkl']

In [7]:
MODEL_PATH = "saved_model"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Cargar modelo y tokenizer
model = DistilBertForSequenceClassification.from_pretrained(MODEL_PATH)
tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_PATH)

# Enviar el modelo a GPU si está disponible
model.to(DEVICE)
model.eval()

# Cargar el codificador de etiquetas
le = joblib.load("label_encoder.pkl")

In [8]:
def predict(texts):
    # Tokenización
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=128, return_tensors="pt")
    input_ids = encodings["input_ids"].to(DEVICE)
    attention_mask = encodings["attention_mask"].to(DEVICE)

    # Inference
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()

    # Predicción
    predictions = np.argmax(probs, axis=1)
    labels = le.inverse_transform(predictions)
    return list(zip(texts, labels, probs))

In [21]:
texts = [
    "You're such a stupid person because of your religion.",
    "You're a good friend.",
    "You are not allowed to rape women",
    "You should rape women everyday"
]

results = predict(texts)

# Mostrar resultados
for text, label, prob in results:
    print(f"\nTexto: {text}")
    print(f"Predicción: {label}")
    print(f"Probabilidades: {dict(zip(le.classes_, prob.round(3)))}")




Texto: You're such a stupid person because of your religion.
Predicción: religion
Probabilidades: {'ethnicity/race': np.float32(0.003), 'gender/sexual': np.float32(0.002), 'not_cyberbullying': np.float32(0.114), 'religion': np.float32(0.881)}

Texto: You're a good friend.
Predicción: not_cyberbullying
Probabilidades: {'ethnicity/race': np.float32(0.0), 'gender/sexual': np.float32(0.001), 'not_cyberbullying': np.float32(0.999), 'religion': np.float32(0.0)}

Texto: You are not allowed to rape women
Predicción: gender/sexual
Probabilidades: {'ethnicity/race': np.float32(0.0), 'gender/sexual': np.float32(0.998), 'not_cyberbullying': np.float32(0.002), 'religion': np.float32(0.0)}

Texto: You should rape women everyday
Predicción: gender/sexual
Probabilidades: {'ethnicity/race': np.float32(0.0), 'gender/sexual': np.float32(0.994), 'not_cyberbullying': np.float32(0.005), 'religion': np.float32(0.0)}


In [None]:
import pandas as pd

