# LIBRARIES

In [2]:
import pandas as pd
import random
from faker import Faker

# PRODUCTS & TEMPLATES 

In [3]:
products = [
    # Mobile
    "Prepaid Basic 5GB",
    "Prepaid Plus 20GB",
    "Prepaid Unlimited",
    "Postpaid Silver",
    "Postpaid Gold",
    "Postpaid Platinum",

    # Home Broadband
    "ZENDFiber Home 100 Mbps",
    "ZENDFiber Home 300 Mbps",
    "ZENDFiber Home 1 Gbps",

    # Office Broadband
    "ZENDOffice Net 200",
    "ZENDOffice Net 500",
    "ZENDOffice Net 1G",

    # Business
    "ZENDBiz Connect 100",
    "ZENDBiz Connect 500",
    "ZENDBiz Connect 1G",
    "ZENDEnterprise Ultra",
    "ZENDEnterprise Dedicated",

    # Cloud
    "ZENDCloud VM Basic",
    "ZENDCloud VM Pro",
    "ZENDCloud VM Enterprise",
    "ZENDStorage 1TB",
    "ZENDStorage 10TB",
    "ZENDArchive Storage",

    # IoT
    "ZENDSmart Traffic",
    "ZENDSmart Lighting",
    "ZENDSmart Parking",
    "ZENDIndustrial Sensor",
    "ZENDFleet IoT"
]

countries = ["India", "USA", "Singapore", "Thailand"]

user_types = ["New User", "Existing User"]

openers = [
    "Hi team,",
    "Hello support,",
    "Good morning,",
    "I wanted to check,",
    "Can you please confirm,",
    "I need assistance regarding,",
    "Hope youâ€™re doing well,",
    "I have a concern about,",
    "Kindly help me with,",
    "I would like to know,"
]

contexts = [
    "for my home connection.",
    "for enterprise usage.",
    "after my recent upgrade.",
    "since last week.",
    "during peak hours.",
    "for my office network.",
    "after installation.",
    "for international usage.",
    "under my current subscription.",
    "as part of my annual plan."
]

templates = [

    # -------- Technical --------
    ("internet speed is extremely slow on {product} in {country}", "Technical", "Frustrated"),
    ("my {product} connection keeps disconnecting in {country}", "Technical", "Frustrated"),
    ("installation time for {product} in {country}", "Technical", "Informational"),
    ("network performance has been excellent on {product}", "Technical", "Satisfied"),

    # -------- Billing --------
    ("billing charges for {product} seem incorrect in {country}", "Billing", "Frustrated"),
    ("explain billing cycle for {product}", "Billing", "Informational"),
    ("billing experience has been smooth so far", "Billing", "Satisfied"),

    # -------- Refund --------
    ("refund not processed for {product}", "Refund", "Frustrated"),
    ("refund eligibility for {product}", "Refund", "Informational"),
    ("refund was credited quickly", "Refund", "Satisfied"),

    # -------- Complaint --------
    ("very unhappy with {product} service", "Complaint", "Frustrated"),
    ("how to raise complaint for {product}", "Complaint", "Informational"),
    ("support resolved my issue professionally", "Complaint", "Satisfied"),

    # -------- Product Inquiry --------
    ("price of {product} in {country}", "Product Inquiry", "Informational"),
    ("features included in {product}", "Product Inquiry", "Informational"),
    ("{product} offers great value for money", "Product Inquiry", "Satisfied"),
    ("cost of {product} is too high", "Product Inquiry", "Frustrated"),
]




# INTENT

In [4]:
def assign_user_type(intent):

    if intent in ["Technical", "Billing", "Complaint", "Refund"]:
        return random.choices(
            ["Existing User", "New User"],
            weights=[0.8, 0.2]
        )[0]

    return random.choices(
        ["New User", "Existing User"],
        weights=[0.7, 0.3]
    )[0]

# DATA GENERATION

In [5]:
rows = set()

while len(rows) < 20000:

    template, intent, sentiment = random.choice(templates)

    opener = random.choice(openers)
    context = random.choice(contexts)

    product = random.choice(products)
    country = random.choice(countries)

    text = f"{opener} {template.format(product=product, country=country)} {context}"

    user_type = assign_user_type(intent)

    rows.add((
        text.strip(),
        intent,
        sentiment,
        user_type
    ))

print("Generated Rows:", len(rows))

Generated Rows: 20000


In [7]:
df = pd.DataFrame(
    list(rows),
    columns=["text", "intent", "sentiment", "user_type"]
)

df.to_csv("data/zends_customer_query_dataset.csv", index=False)

print("Dataset generated successfully.")

Dataset generated successfully.


In [8]:
df.head()

Unnamed: 0,text,intent,sentiment,user_type
0,"I have a concern about, cost of Prepaid Plus 2...",Product Inquiry,Frustrated,New User
1,"Good morning, billing charges for ZENDEnterpri...",Billing,Frustrated,Existing User
2,"Hi team, ZENDFleet IoT offers great value for ...",Product Inquiry,Satisfied,Existing User
3,"I wanted to check, cost of Postpaid Silver is ...",Product Inquiry,Frustrated,New User
4,"Hi team, features included in ZENDBiz Connect ...",Product Inquiry,Informational,New User
