# Task 1: News Topic Classifier Using BERT
Fine-tune a transformer model (BERT) to classify news headlines into topic categories.


In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install ipywidgets


In [None]:
import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

os.environ["TRANSFORMERS_NO_TF"] = "1"  

import torch
from datasets import load_dataset
from transformers import BertTokenizerFast, BertForSequenceClassification, TrainingArguments, Trainer
import evaluate
import numpy as np


In [29]:

# 1. Load Dataset
dataset = load_dataset("ag_news")


In [30]:
# 2. Load Tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

# Tokenize function
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)

dataset = dataset.map(tokenize, batched=True)


In [None]:
# 3. Prepare dataset for PyTorch
dataset = dataset.rename_column("label", "labels")
dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

train_dataset = dataset["train"]
test_dataset = dataset["test"]


In [None]:
# 4. Load Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)


In [None]:
# 5. Metrics
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1": f1.compute(predictions=preds, references=labels, average="weighted")["f1"]
    }


In [None]:
# 6. Training
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


In [None]:
# 7. Evaluate
results = trainer.evaluate()
print(results)


In [None]:
# 8. Deploy with Gradio
import gradio as gr

def predict_news(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    preds = outputs.logits.argmax(dim=1).item()
    labels = ["World", "Sports", "Business", "Sci/Tech"]
    return labels[preds]

demo = gr.Interface(fn=predict_news, inputs="text", outputs="label")
demo.launch(share=True)