In [7]:
#importing the jason file

import json
import pandas as pd
with open("Conversational_Transcript_Dataset.json", "r") as f:
    conversations = json.load(f)

In [8]:
#Building a summary of the entire dataset,with each row having 1 full convo.

rows = []

for convo_list in conversations.values():
    for convo in convo_list:
        full_text = []
        for turn in convo["conversation"]:
            full_text.append(turn["text"])

        rows.append({
            "transcript_id": convo["transcript_id"],
            "intent": convo["intent"],
            "conversation": convo["conversation"],
            "conversation_text": " ".join(full_text)
        })

df = pd.DataFrame(rows)
print("Total conversations:", len(df))
print(df)

Total conversations: 5037
            transcript_id                                         intent  \
0     6794-8660-4606-3216                         Delivery Investigation   
1     7034-5430-2980-5483         Escalation - Repeated Service Failures   
2     1846-5500-2990-8975                      Fraud Alert Investigation   
3     1616-8531-3291-5075                      Fraud Alert Investigation   
4     7441-4348-3458-2384                          Account Access Issues   
...                   ...                                            ...   
5032  3383-1417-7709-1518                          Service Interruptions   
5033  5831-6877-4895-6254          Business Event - Major Policy Changes   
5034  5395-2366-5729-1208                          Service Interruptions   
5035  2285-8643-9678-2201  Multiple Issues - Returns & Account Inquiries   
5036  5013-5906-1491-6772    Multiple Issues - Technical, Plan & Payment   

                                           conversation  \
0 

In [9]:
#Task-1

import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

vectorizer = TfidfVectorizer(stop_words="english",max_features=3000)
X = vectorizer.fit_transform(df["conversation_text"])
y = df["intent"]
model = MultinomialNB()
model.fit(X, y)
print("Model Trained Check.")

feature_names = np.array(vectorizer.get_feature_names_out())
intent_keywords = {}
for i, intent in enumerate(model.classes_):
    top_idx = model.feature_log_prob_[i].argsort()[-5:]
    intent_keywords[intent] = feature_names[top_idx].tolist()

def answer_why_query(intent_name):
    relevant = df[df["intent"] == intent_name]
    if relevant.empty:
        return {"intent": intent_name,
                "explanation": "No conversations found for this outcome.",
                "main_causes": [],
                "evidence": []}
    causes = intent_keywords.get(intent_name, [])
    evidence = []
    for _, row in relevant.head(5).iterrows():
        spans = [turn["text"]
                 for turn in row["conversation"]
                 if turn["speaker"].lower() == "customer" and any(cause in turn["text"].lower() for cause in causes)]
        
        evidence.append({"transcript_id": row["transcript_id"],
                         "evidence_spans": spans})
    explanation = (f"The outcome '{intent_name}' is driven by  customer issues such as {', '.join(causes[:3])}, learned from historical conversations.")
    return {"intent": intent_name,
            "main_causes": causes,
            "explanation": explanation,
            "evidence": evidence}

queries = [("Q1", "Why do delivery investigation cases occur?", "Delivery Investigation"),
           ("Q2", "What causes customers to raise delivery investigations?", "Delivery Investigation"),
           ("Q3", "Why are fraud alert investigations triggered?", "Fraud Alert Investigation"),
           ("Q4", "What conversational patterns lead to fraud alerts?", "Fraud Alert Investigation"),
           ("Q5", "Why do service interruption complaints occur?", "Service Interruptions"),
           ("Q6", "What issues result in service interruptions?", "Service Interruptions"),
           ("Q7", "Why do repeated service failures escalate?", "Escalation - Repeated Service Failures"),
           ("Q8", "What causes repeated service issues to escalate?", "Escalation - Repeated Service Failures"),
           ("Q9", "Why do customers complain about deliveries?", "Delivery Investigation"),
           ("Q10", "What customer problems commonly lead to escalations?", "Escalation - Repeated Service Failures")]

results = []

for qid, query, intent in queries:
    output = answer_why_query(intent)
    results.append({"Query_Id": qid,
                    "Query": query,
                    "Query_Category": "Task-1",
                    "System_Output": json.dumps(output)})

results_df = pd.DataFrame(results)
results_df.to_csv("task1_queries_outputs.csv", index=False)
print("Csv file saved.")


Model Trained Check.
Csv file saved.


In [10]:
#Task 2

last_intent = None
def detect_intent_from_query(query):
    q = query.lower()
    if "delivery" in q:
        return "Delivery Investigation"
    elif "fraud" in q:
        return "Fraud Alert Investigation"
    elif "interruption" in q:
        return "Service Interruptions"
    elif "escalation" in q or "repeated" in q or "failure" in q:
        return "Escalation - Repeated Service Failures"
    else:
        return None

def answer_query_with_context(query):
    global last_intent
    detected_intent = detect_intent_from_query(query)
    if detected_intent is None:
        if last_intent is None:
            return {"query": query,
                    "error": "No previous context available",
                    "context_used": False}
        intent = last_intent
        context_used = True
    else:
        intent = detected_intent
        context_used = False
    output = answer_why_query(intent)
    last_intent = intent
    output["context_used"] = context_used
    output["resolved_intent"] = intent
    return output

last_intent = None
dialogue = [
    # Delivery Investigation 
    "Why do delivery investigation cases occur?",
    "What usually goes wrong during delivery?",
    "Is this mostly due to delays or missing packages?",
    "Do customers complain multiple times about this?",
    "Does this lead to escalation if unresolved?",
    # Switch intent explicitly
    "Why are fraud alert investigations triggered?",
    "What actions raise suspicion from the system?",
    "Do customers realize fraud alerts before contacting support?",
    "Are repeated fraud alerts common?",
    # Implicit intent continuation
    "What conversational patterns appear before this?",
    "Do agents follow a fixed process here?",
    # New intent: service interruption
    "Why do service interruptions occur?",
    "Are these mostly technical failures?",
    "Do customers report outages repeatedly?",
    "Does repeated downtime frustrate customers?",
    # Escalation intent via context
    "Why do repeated service failures escalate?",
    "What pushes customers to demand escalation?",
    "Does poor resolution history contribute to this?"]

for turn, user_query in enumerate(dialogue, start=1):
    response = answer_query_with_context(user_query)
    print(f"Turn {turn}")
    print(f"User Query : {user_query}")
    print(f"Context Used : {response['context_used']}")
    print(f"Resolved Intent : {response['resolved_intent']}")
    print("-" * 80)



Turn 1
User Query : Why do delivery investigation cases occur?
Context Used : False
Resolved Intent : Delivery Investigation
--------------------------------------------------------------------------------
Turn 2
User Query : What usually goes wrong during delivery?
Context Used : False
Resolved Intent : Delivery Investigation
--------------------------------------------------------------------------------
Turn 3
User Query : Is this mostly due to delays or missing packages?
Context Used : True
Resolved Intent : Delivery Investigation
--------------------------------------------------------------------------------
Turn 4
User Query : Do customers complain multiple times about this?
Context Used : True
Resolved Intent : Delivery Investigation
--------------------------------------------------------------------------------
Turn 5
User Query : Does this lead to escalation if unresolved?
Context Used : False
Resolved Intent : Escalation - Repeated Service Failures
-------------------------