In [1]:
!pip install transformers datasets torch pandas tqdm emoji

Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvid

The Libraries

In [2]:
import json
import re
import pandas as pd
from tqdm import tqdm
import emoji

from transformers import pipeline

Preprocessing Function

Cleaning the text:

lowercase

remove emojis

remove extra spaces

In [14]:
def clean_text(text):
    # Lowercase
    text = text.lower()
    # Remove emojis
    text = emoji.replace_emoji(text, replace='')
    # Remove special characters (keep basic punctuation)
    text = re.sub(r'[^a-z0-9\s.,!?]', '', text)
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    return text
def preprocess_conversation(messages, last_n=4):
    """Take last N *user* messages only"""
    user_msgs = [m['text'] for m in messages if m['sender'] == 'user']
    truncated = user_msgs[-last_n:]
    conversation_text = " ".join([clean_text(m) for m in truncated])
    return conversation_text

Intent Classifier (Zero-Shot)

In [15]:
intent_labels = [
    "Book Appointment (user wants to schedule a visit, meeting, or appointment)",
    "Product Inquiry (user is asking about availability or details of a product or service)",
    "Pricing Negotiation (user is bargaining or asking for a better price)",
    "Support Request (user needs help or reports an issue)",
    "Follow-Up (user is checking on a previous request or waiting for an update)"
]


classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def predict_intent(conversation):
    """Predict intent + simple rationale"""
    result = classifier(conversation, intent_labels)
    predicted_intent = result['labels'][0]
    confidence = result['scores'][0]

    # Extract last user message as part of rationale
    last_user_message = conversation.split("user:")[-1].split("agent:")[0].strip()
    rationale = f"The conversation ends with: '{last_user_message}'. Model suggests '{predicted_intent}' with confidence {confidence:.2f}."

    return predicted_intent, rationale

Device set to use cpu


Main processing function

In [6]:
def process_conversations(input_file, output_json, output_csv):
    with open(input_file, 'r') as f:
        conversations = json.load(f)

    results = []

    for conv in tqdm(conversations, desc="Processing conversations"):
        conv_id = conv["conversation_id"]
        context = preprocess_conversation(conv["messages"])
        intent, rationale = predict_intent(context)

        results.append({
            "conversation_id": conv_id,
            "predicted_intent": intent,
            "rationale": rationale
        })

    # Save JSON
    with open(output_json, 'w') as f:
        json.dump(results, f, indent=2)

    # Save CSV
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)

    print(f"✅ Saved {len(results)} predictions!")
    return results

In [16]:
from google.colab import files

print("📤 Please upload the input JSON file...")
uploaded = files.upload()

# Get the uploaded filename dynamically
input_file = list(uploaded.keys())[0]

# Output filenames
output_json = "predicted_intents.json"
output_csv = "predicted_intents.csv"

# Run processing
results = process_conversations(input_file, output_json, output_csv)

# Download outputs
print("📥 Downloading results...")
files.download(output_json)
files.download(output_csv)


📤 Please upload the input JSON file...


Saving _MConverter.eu_conv.json to _MConverter.eu_conv (4).json


Processing conversations: 100%|██████████| 3/3 [00:15<00:00,  5.22s/it]

✅ Saved 3 predictions!
📥 Downloading results...





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>