In [1]:
!pip install gradio transformers torch scikit-learn



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
from transformers import BertForSequenceClassification, BertTokenizerFast
import pickle
import os
import gradio as gr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

from google.colab import drive
drive.mount('/content/drive')

base_path = "/content/drive/MyDrive/bert_models"

level1_path = os.path.join(base_path, "bert_level1_model")

model = BertForSequenceClassification.from_pretrained(level1_path).to(device)
tokenizer = BertTokenizerFast.from_pretrained(level1_path)

with open(os.path.join(base_path, "level1_encoder.pkl"), "rb") as f:
    le = pickle.load(f)

fine_tuned_models = {}
fine_label_encoders = {}

groups = ['sports', 'autos', 'computers', 'science', 'religion', 'politics', 'sales']

for group in groups:
    fine_model_path = os.path.join(base_path, f"bert_fine_{group}")
    encoder_path = os.path.join(base_path, f"fine_encoder_{group}.pkl")

    if os.path.exists(fine_model_path) and os.path.exists(encoder_path):
        fine_model = BertForSequenceClassification.from_pretrained(fine_model_path).to(device)
        with open(encoder_path, "rb") as f:
            fine_encoder = pickle.load(f)

        fine_tuned_models[group] = fine_model
        fine_label_encoders[group] = fine_encoder

def hierarchical_predict(text, level_choice):
    inputs = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=256,
        return_tensors="pt"
    ).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    pred_idx = torch.argmax(logits, dim=1).item()
    pred_group_label = le.inverse_transform([pred_idx])[0]

    if level_choice == "Level-1 Only":
        return f"Level-1 Category: {pred_group_label}"

    elif level_choice == "Level-2 (Detailed)":
        if pred_group_label not in fine_tuned_models:
            return f" Fine-grained prediction: {pred_group_label}"
        else:
            fine_model = fine_tuned_models[pred_group_label]
            fine_encoder = fine_label_encoders[pred_group_label]

            fine_model.eval()
            with torch.no_grad():
                fine_outputs = fine_model(**inputs)

            fine_logits = fine_outputs.logits
            fine_pred_idx = torch.argmax(fine_logits, dim=1).item()
            fine_pred_label = fine_encoder.inverse_transform([fine_pred_idx])[0]

            return f"Level-1 : {pred_group_label}\nLevel-2 : {fine_pred_label}"

iface = gr.Interface(
    fn=hierarchical_predict,
    inputs=[
        gr.Textbox(lines=8, placeholder="Paste your document here..."),
        gr.Radio(["Level-1 Only", "Level-2 (Detailed)"], label="Prediction Type")
    ],
    outputs="text",
    title="Hierarchical Document Classifier",
    description="Choose Level-1 for top-level category only, or Level-2 for detailed prediction."
)

iface.launch(share=True)


Using device: cuda
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://55ef6e70877078616d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


