In [4]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce MX150


In [2]:
import torch
print("Is GPU available?", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0))

Is GPU available? True
GPU Name: NVIDIA GeForce MX150


In [3]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    
    # --- MX150 SAFE SETTINGS ---
    per_device_train_batch_size=2,   # Very small batch for 4GB RAM
    gradient_accumulation_steps=4,   # This makes the "real" batch size 8
    fp16=True,                       # Reduces memory usage
    max_steps=1000,                  # Start small to test
    # ---------------------------
    
    learning_rate=2e-5,
    weight_decay=0.01,
)

In [7]:
from datasets import load_dataset

# Define the file paths (ensure these files are in the same folder as your notebook)
data_files = {
    "train": "train_simple.csv",
    "validation": "val_simple.csv",
    "test": "test_simple.csv"
}

# Load the dataset
raw_datasets = load_dataset("csv", data_files=data_files)

# Print the structure to see columns and row counts
print(raw_datasets)

DatasetDict({
    train: Dataset({
        features: ['tweet_id', 'text', 'Violenc', 'Hate', 'Vulgar', 'HateSpeech'],
        num_rows: 3528
    })
    validation: Dataset({
        features: ['tweet_id', 'text', 'Violenc', 'Hate', 'Vulgar', 'HateSpeech'],
        num_rows: 706
    })
    test: Dataset({
        features: ['tweet_id', 'text', 'Violenc', 'Hate', 'Vulgar', 'HateSpeech'],
        num_rows: 2822
    })
})


In [4]:
print("Example row:", raw_datasets["train"][0])
print("Column names:", raw_datasets["train"].column_names)

Example row: {'tweet_id': 1222188705437429762, 'text': 'در جریانید که حق طلاق توی ایران با مرده دیگه؟؟؟', 'Violenc': 0, 'Hate': 0, 'Vulgar': 0, 'HateSpeech': 0}
Column names: ['tweet_id', 'text', 'Violenc', 'Hate', 'Vulgar', 'HateSpeech']


In [8]:
# Rename 'HateSpeech' to 'label' for the trainer
def prepare_labels(batch):
    batch["label"] = batch["HateSpeech"]
    return batch

dataset = raw_datasets.map(prepare_labels)

Map:   0%|          | 0/3528 [00:00<?, ? examples/s]

Map:   0%|          | 0/706 [00:00<?, ? examples/s]

Map:   0%|          | 0/2822 [00:00<?, ? examples/s]

In [9]:
from transformers import AutoTokenizer

model_checkpoint = "HooshvareLab/bert-fa-zwnj-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/292 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

Map:   0%|          | 0/3528 [00:00<?, ? examples/s]

Map:   0%|          | 0/706 [00:00<?, ? examples/s]

Map:   0%|          | 0/2822 [00:00<?, ? examples/s]

In [10]:
from transformers import AutoModelForSequenceClassification
import torch

# We have 2 classes: 0 (Normal) and 1 (HateSpeech)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)

# Move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model loaded on: {device}")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/473M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at HooshvareLab/bert-fa-zwnj-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on: cuda


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/473M [00:00<?, ?B/s]

Error while downloading from https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/refs%2Fpr%2F1/model.safetensors: HTTPSConnectionPool(host='cas-bridge.xethub.hf.co', port=443): Read timed out.
Trying to resume download...
'(ReadTimeoutError("HTTPSConnectionPool(host='cas-bridge.xethub.hf.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 55c260a1-3d5f-437e-9ca4-b3ad00be3ba9)')' thrown while requesting GET https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/refs%2Fpr%2F1/model.safetensors
Retrying in 1s [Retry 1/5].


model.safetensors:   9%|8         | 41.9M/473M [00:00<?, ?B/s]

Error while downloading from https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/refs%2Fpr%2F1/model.safetensors: HTTPSConnectionPool(host='cas-bridge.xethub.hf.co', port=443): Read timed out.
Trying to resume download...
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 452324c1-a34d-460f-8bd6-0fbf3337a52b)')' thrown while requesting GET https://huggingface.co/HooshvareLab/bert-fa-zwnj-base/resolve/refs%2Fpr%2F1/model.safetensors
Retrying in 1s [Retry 1/5].


model.safetensors:  93%|#########3| 440M/473M [00:00<?, ?B/s]

In [11]:
import numpy as np
import evaluate

metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script: 0.00B [00:00, ?B/s]

In [13]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./bert-hate-speech-results",
    logging_dir="./logs",
    
    # --- GPU MEMORY OPTIMIZATION ---
    fp16=True,                          
    per_device_train_batch_size=4,      
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,      
    gradient_checkpointing=True,        
    # -------------------------------

    # CHANGE: 'evaluation_strategy' becomes 'eval_strategy'
    eval_strategy="epoch",  
    save_strategy="epoch",
    
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none" 
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [14]:
trainer.train()

Epoch,Training Loss,Validation Loss,F1
1,No log,0.486618,0.731861
2,No log,0.47093,0.750392
3,0.455700,0.555631,0.747182


TrainOutput(global_step=663, training_loss=0.39524363176855026, metrics={'train_runtime': 2053.2548, 'train_samples_per_second': 5.155, 'train_steps_per_second': 0.323, 'total_flos': 696191852482560.0, 'train_loss': 0.39524363176855026, 'epoch': 3.0})

In [15]:
results = trainer.evaluate(tokenized_datasets["test"])
print("Test Results:", results)

Test Results: {'eval_loss': 0.47786349058151245, 'eval_f1': 0.7526273241713823, 'eval_runtime': 123.552, 'eval_samples_per_second': 22.841, 'eval_steps_per_second': 5.714, 'epoch': 3.0}


In [17]:
from sklearn.metrics import classification_report
import numpy as np

# 1. Get predictions on the test set
test_predictions = trainer.predict(tokenized_datasets["test"])
preds = np.argmax(test_predictions.predictions, axis=-1)
actual_labels = test_predictions.label_ids

# 2. Print the detailed report
# Label 0 = Normal, Label 1 = HateSpeech
print(classification_report(actual_labels, preds, target_names=["Normal", "HateSpeech"]))

              precision    recall  f1-score   support

      Normal       0.80      0.82      0.81      1563
  HateSpeech       0.77      0.74      0.75      1259

    accuracy                           0.78      2822
   macro avg       0.78      0.78      0.78      2822
weighted avg       0.78      0.78      0.78      2822



In [2]:
# Create a folder for your saved model
model_save_path = "./parsbert-hate-speech-model"

# Save the model and the tokenizer
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"Model successfully saved to: {model_save_path}")

NameError: name 'trainer' is not defined

# without training classification report 

In [6]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm

# 1. Use the RAW ParsBERT checkpoint from Hugging Face
model_checkpoint = "HooshvareLab/bert-fa-zwnj-base" 

# 2. Load Tokenizer and Model
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# When we set num_labels=2, Hugging Face adds a RANDOM 2-class head on top
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)

# Move to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# 3. Load the Test Dataset
dataset = load_dataset("csv", data_files={"test": "test_simple.csv"})["test"]

# 4. Inference Loop
all_preds = []
all_labels = []

print("Running RAW model inference (Expect random results)...")
for item in tqdm(dataset):
    text = item['text']
    label = item['HateSpeech']
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128).to(device)
    
    with torch.no_grad():
        logits = model(**inputs).logits
    
    prediction = torch.argmax(logits, dim=-1).item()
    
    all_preds.append(prediction)
    all_labels.append(label)

# 5. Print the Report
print("\n" + "="*55)
print(f"REPORT FOR RAW MODEL: {model_checkpoint}")
print("="*55)
print(classification_report(all_labels, all_preds, target_names=["Normal", "HateSpeech"]))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at HooshvareLab/bert-fa-zwnj-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Running RAW model inference (Expect random results)...


100%|██████████████████████████████████████████████████████████████████████████████| 2822/2822 [02:30<00:00, 18.70it/s]


REPORT FOR RAW MODEL: HooshvareLab/bert-fa-zwnj-base
              precision    recall  f1-score   support

      Normal       0.56      0.28      0.38      1563
  HateSpeech       0.45      0.72      0.55      1259

    accuracy                           0.48      2822
   macro avg       0.50      0.50      0.46      2822
weighted avg       0.51      0.48      0.45      2822






# AS an other model we consider DistilGPT-2

In [18]:
from transformers import AutoTokenizer

model_checkpoint = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# CRITICAL FIX FOR GPT-2:
# GPT-2 does not have a pad token. we use the EOS token instead.
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/3528 [00:00<?, ? examples/s]

Map:   0%|          | 0/706 [00:00<?, ? examples/s]

Map:   0%|          | 0/2822 [00:00<?, ? examples/s]

In [19]:
from transformers import AutoModelForSequenceClassification
import torch

# Load DistilGPT-2 for classification
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)

# FIX: Tell the model which ID is the padding token
model.config.pad_token_id = model.config.eos_token_id

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForSequenceClassification(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=2, bias=False)
)

In [20]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./distilgpt2-hate-speech",
    eval_strategy="epoch",
    save_strategy="epoch",
    
    # GPU Optimizations
    fp16=True,
    per_device_train_batch_size=8,      # You can increase to 8 for DistilGPT!
    gradient_accumulation_steps=2,
    
    learning_rate=5e-5,                 # GPT models often need a slightly higher LR
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics, # Use the same function as before
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,F1
1,No log,0.687172,0.651835
2,No log,0.666838,0.598916
3,0.699400,0.666499,0.534591


TrainOutput(global_step=663, training_loss=0.6882548727780625, metrics={'train_runtime': 933.5562, 'train_samples_per_second': 11.337, 'train_steps_per_second': 0.71, 'total_flos': 345708086427648.0, 'train_loss': 0.6882548727780625, 'epoch': 3.0})

In [21]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# 1. Use the trainer to predict labels for the test set
test_output = trainer.predict(tokenized_datasets["test"])

# 2. Extract the predictions (logits) and true labels
# 'test_output.predictions' are the raw scores, we use argmax to get 0 or 1
test_preds = np.argmax(test_output.predictions, axis=-1)
test_labels = test_output.label_ids

# 3. Print the report
print("Classification Report for ParsBERT on Test Dataset:")
print("-" * 60)
print(classification_report(test_labels, test_preds, target_names=["Normal", "HateSpeech"]))

Classification Report for ParsBERT on Test Dataset:
------------------------------------------------------------
              precision    recall  f1-score   support

      Normal       0.62      0.62      0.62      1563
  HateSpeech       0.53      0.53      0.53      1259

    accuracy                           0.58      2822
   macro avg       0.57      0.57      0.57      2822
weighted avg       0.58      0.58      0.58      2822



In [23]:
# import seaborn as sns
# import matplotlib.pyplot as plt

# # Create the matrix
# cm = confusion_matrix(test_labels, test_preds)

# # Plot it
# plt.figure(figsize=(8, 6))
# sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
#             xticklabels=["Normal", "HateSpeech"], 
#             yticklabels=["Normal", "HateSpeech"])
# plt.xlabel('Predicted Label')
# plt.ylabel('True Label')
# plt.title('Confusion Matrix: ParsBERT Prediction')
# plt.show()

In [24]:
# Save weights
model.save_pretrained("./my_saved_DistilGPT-2_model")

# Save tokenizer (CRITICAL: this keeps your Persian vocabulary settings)
tokenizer.save_pretrained("./my_saved_DistilGPT-2_model")

('./my_saved_DistilGPT-2_model\\tokenizer_config.json',
 './my_saved_DistilGPT-2_model\\special_tokens_map.json',
 './my_saved_DistilGPT-2_model\\vocab.json',
 './my_saved_DistilGPT-2_model\\merges.txt',
 './my_saved_DistilGPT-2_model\\added_tokens.json',
 './my_saved_DistilGPT-2_model\\tokenizer.json')

# raw model of DistilGPT-2

In [9]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm

# 1. استفاده از نسخه خام و انگلیسی DistilGPT-2
model_checkpoint = "distilgpt2" 

# 2. بارگذاری توکنایزر و تنظیم توکن Pad (حیاتی برای GPT-2)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer.pad_token = tokenizer.eos_token

# 3. بارگذاری مدل با لایه طبقه‌بندی تصادفی (Newly Initialized)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
model.config.pad_token_id = tokenizer.pad_token_id

# انتقال به GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# 4. بارگذاری مجموعه داده تست
dataset = load_dataset("csv", data_files={"test": "test_simple.csv"})["test"]

all_preds = []
all_labels = []

print("Running RAW DistilGPT-2 inference (English model on Persian text)...")
for item in tqdm(dataset):
    text = item['text']
    label = item['HateSpeech']
    
    # توکنایز کردن (GPT-2 از سمت چپ یا راست پدینگ می‌کند، پیش‌فرض Trainer معمولا راست است)
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128).to(device)
    
    with torch.no_grad():
        logits = model(**inputs).logits
    
    prediction = torch.argmax(logits, dim=-1).item()
    
    all_preds.append(prediction)
    all_labels.append(label)

# 5. چاپ گزارش عملکرد
print("\n" + "="*55)
print(f"REPORT FOR RAW MODEL: {model_checkpoint}")
print("="*55)
print(classification_report(all_labels, all_preds, target_names=["Normal", "HateSpeech"]))

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Running RAW DistilGPT-2 inference (English model on Persian text)...


100%|██████████████████████████████████████████████████████████████████████████████| 2822/2822 [01:18<00:00, 36.09it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



REPORT FOR RAW MODEL: distilgpt2
              precision    recall  f1-score   support

      Normal       0.00      0.00      0.00      1563
  HateSpeech       0.45      1.00      0.62      1259

    accuracy                           0.45      2822
   macro avg       0.22      0.50      0.31      2822
weighted avg       0.20      0.45      0.28      2822



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [2]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm

model_checkpoint = "distilgpt2"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"   # important for GPT-2

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)
model.config.pad_token_id = tokenizer.pad_token_id

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

dataset = load_dataset("csv", data_files={"test": "test_simple.csv"})["test"]

# (Optional) shuffle dataset order
dataset = dataset.shuffle(seed=42)

def collate_fn(batch):
    texts = [x["text"] for x in batch]
    labels = torch.tensor([x["HateSpeech"] for x in batch], dtype=torch.long)
    enc = tokenizer(
        texts,
        truncation=True,
        padding=True,        # dynamic padding per batch
        max_length=128,
        return_tensors="pt"
    )
    enc["labels"] = labels
    return enc

batch_size = 32
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

all_preds, all_labels = [], []

print("Running batched inference...")
for batch in tqdm(loader):
    labels = batch.pop("labels")
    batch = {k: v.to(device) for k, v in batch.items()}
    labels = labels.to(device)

    with torch.no_grad():
        logits = model(**batch).logits

    preds = torch.argmax(logits, dim=-1)

    all_preds.extend(preds.cpu().numpy().tolist())
    all_labels.extend(labels.cpu().numpy().tolist())

print("Pred counts:", np.bincount(np.array(all_preds)))
print(classification_report(all_labels, all_preds, target_names=["Normal", "HateSpeech"], zero_division=0))


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Running batched inference...


100%|██████████████████████████████████████████████████████████████████████████████████| 89/89 [01:03<00:00,  1.40it/s]

Pred counts: [ 691 2131]
              precision    recall  f1-score   support

      Normal       0.63      0.28      0.39      1563
  HateSpeech       0.47      0.80      0.59      1259

    accuracy                           0.51      2822
   macro avg       0.55      0.54      0.49      2822
weighted avg       0.56      0.51      0.48      2822




