<a href="https://colab.research.google.com/github/divyasri-jegan-11/IntentDetectAI/blob/main/intent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install necessary libraries

In [None]:
!pip install -q transformers datasets evaluate

Train  the model and learn the multilingual words

In [None]:
from datasets import load_dataset

# Load ClINC150 multilingual (English + other languages)
dataset = load_dataset("clinc_oos", "plus")  # Use "plus" for multilingual version

# Check labels
labels = dataset["train"].features["intent"].names
num_labels = len(labels)


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length")

# Apply tokenization
dataset = dataset.map(tokenize, batched=True)

# Rename and format
dataset = dataset.rename_column("intent", "labels")
dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# Split
train_ds = dataset["train"]
val_ds = dataset["validation"]
test_ds = dataset["test"]


In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-multilingual-cased",
    num_labels=num_labels
)


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    num_train_epochs=2,
    save_total_limit=1,
    load_best_model_at_end=True,
    report_to="none"  # Disable wandb
)


In [None]:
from transformers import Trainer, TrainingArguments

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer
)

trainer.train()


Find the accuracy of the model developed

In [None]:
import evaluate

metric = evaluate.load("accuracy")
preds = trainer.predict(test_ds)
accuracy = metric.compute(predictions=preds.predictions.argmax(-1), references=preds.label_ids)

print("Test Accuracy:", accuracy)


Use huggingface concept and deploying it in the huggingface space

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
model.push_to_hub("IntentDetectAI")
tokenizer.push_to_hub("IntentDetectAI")


To have a application use streamlit

In [None]:
!pip install streamlit torch

In [None]:
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

st.title("🧠 IntentDetectAI")
st.write("Multilingual Intent Classifier")

tokenizer = AutoTokenizer.from_pretrained("divyasani11/IntentDetectAI")
model = AutoModelForSequenceClassification.from_pretrained("divyasani11/IntentDetectAI")

text = st.text_input("Enter your query:")
if text:
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    prediction = torch.argmax(logits, dim=-1).item()
    st.write(f"Predicted intent: {prediction}")
