In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
import torch

In [2]:
# 1️⃣ CAMeLBERT sentiment (BERT-style)
camelbert_tokenizer = AutoTokenizer.from_pretrained(
    "CAMeL-Lab/bert-base-arabic-camelbert-mix-sentiment"
)
camelbert_model = AutoModelForSequenceClassification.from_pretrained(
    "CAMeL-Lab/bert-base-arabic-camelbert-mix-sentiment"
)

# 2️⃣ AraBERT sentence-transformer (BERT-style)
arabert_tokenizer = AutoTokenizer.from_pretrained(
    "kathaem/aubmindlab-arabertv02-base-sentence-transformer-xnli-ar"
)
arabert_model = AutoModelForSequenceClassification.from_pretrained(
    "kathaem/aubmindlab-arabertv02-base-sentence-transformer-xnli-ar"
)

# 3️⃣ AraT5v2 sentiment (T5-style)
arat5_tokenizer = AutoTokenizer.from_pretrained(
    "Noanihio/arat5v2-darja-sentiment"
)
arat5_model = AutoModelForSeq2SeqLM.from_pretrained(
    "Noanihio/arat5v2-darja-sentiment"
)

print("✅ All 3 models loaded successfully")

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/637 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/541M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at kathaem/aubmindlab-arabertv02-base-sentence-transformer-xnli-ar and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.47G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/541M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

✅ All 3 models loaded successfully


In [3]:
test_reviews = [
    "هذا المنتج ممتاز جدًا!",
    "الخدمة عادية، لا شيء مميز",
    "أنا غير سعيد بالخدمة، سيئة للغاية"
]

In [4]:
def predict_bert_sentiment(model, tokenizer, texts):
    model.eval()
    predictions = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs.logits, dim=-1)
            pred_label = torch.argmax(probs, dim=-1).item()
        predictions.append(pred_label)
    return predictions

In [5]:
def predict_t5_sentiment(model, tokenizer, texts):
    model.eval()
    predictions = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        output_ids = model.generate(**inputs)
        pred_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        # Map text to numeric label if needed (-1,0,1)
        if pred_text in ["negative", "سلبية", "-1"]:
            predictions.append(-1)
        elif pred_text in ["neutral", "محايدة", "0"]:
            predictions.append(0)
        else:
            predictions.append(1)  # positive
    return predictions

In [6]:
camelbert_preds = predict_bert_sentiment(camelbert_model, camelbert_tokenizer, test_reviews)
arabert_preds = predict_bert_sentiment(arabert_model, arabert_tokenizer, test_reviews)
arat5_preds    = predict_t5_sentiment(arat5_model, arat5_tokenizer, test_reviews)

for i, review in enumerate(test_reviews):
    print(f"Review: {review}")
    print(f"CAMeLBERT Prediction: {camelbert_preds[i]}")
    print(f"AraBERT Prediction: {arabert_preds[i]}")
    print(f"AraT5v2 Prediction: {arat5_preds[i]}")
    print("-"*50)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Review: هذا المنتج ممتاز جدًا!
CAMeLBERT Prediction: 0
AraBERT Prediction: 1
AraT5v2 Prediction: 1
--------------------------------------------------
Review: الخدمة عادية، لا شيء مميز
CAMeLBERT Prediction: 2
AraBERT Prediction: 1
AraT5v2 Prediction: 0
--------------------------------------------------
Review: أنا غير سعيد بالخدمة، سيئة للغاية
CAMeLBERT Prediction: 1
AraBERT Prediction: 1
AraT5v2 Prediction: 0
--------------------------------------------------
