# 📰 News Topic Classifier Using BERT
Fine-tune a BERT model on the AG News dataset and deploy it with Gradio.

In [None]:
# ✅ Step 0: Install Required Libraries
!pip install transformers datasets scikit-learn gradio --quiet

In [None]:
# 📥 Step 1: Load and Preprocess the Dataset
from datasets import load_dataset
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split

dataset = load_dataset("ag_news")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_function(example):
    return tokenizer(example['text'], padding='max_length', truncation=True, max_length=128)

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

In [None]:
# 🧠 Step 2: Fine-Tune BERT Model
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
import torch

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=1,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer,
)

trainer.train()

In [None]:
# 📊 Step 3: Evaluate the Model
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    return {"accuracy": acc, "f1": f1}

trainer.compute_metrics = compute_metrics
eval_results = trainer.evaluate()
print(eval_results)

In [None]:
# 🌐 Step 4: Deploy with Gradio
import gradio as gr

label_names = dataset["train"].features["label"].names

def predict_news_topic(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
    pred = torch.argmax(outputs.logits, dim=1).item()
    return label_names[pred]

demo = gr.Interface(
    fn=predict_news_topic,
    inputs=gr.Textbox(lines=2, placeholder="Enter news headline here..."),
    outputs="text",
    title="News Topic Classifier",
    description="Enter a news headline to classify it into one of four categories: World, Sports, Business, or Sci/Tech."
)

demo.launch()