In [1]:
import transformers
print(transformers.__version__)

4.57.0


In [2]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import classification_report

In [3]:
# 1. Load data
train_df = pd.read_csv("../data/train.csv")
test_df = pd.read_csv("../data/test.csv")

# Combine title + description
train_df['text'] = train_df['Title'].astype(str) + " " + train_df['Description'].astype(str)
test_df['text'] = test_df['Title'].astype(str) + " " + test_df['Description'].astype(str)

X_train, y_train = train_df['text'].tolist(), train_df['Class Index'].tolist()
X_test, y_test = test_df['text'].tolist(), test_df['Class Index'].tolist()

# Labels go from 1..4, convert to 0..3
y_train = [y-1 for y in y_train]
y_test = [y-1 for y in y_test]

# 2. Tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length=128)

class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

train_dataset = NewsDataset(train_encodings, y_train)
test_dataset = NewsDataset(test_encodings, y_test)

In [None]:
# 3. Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)

# 4. Training setup
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",          # <-- use this on 4.57.0
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    report_to="none",               # optional: silence wandb if not configured
    seed=42                         # reproducibility
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer
)

# 5. Train
trainer.train()

# 6. Evaluate
preds = trainer.predict(test_dataset)
y_pred = preds.predictions.argmax(-1)

print(classification_report(y_test, y_pred, target_names=["World","Sports","Business","Sci/Tech"]))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss
