GENERACIÓN DE DATOS SINTÉTICOS PARA ANÁLISIS DE ENCUESTAS DE CALL CENTER PARA LA CALIBRACIÓN DE MODELOS

In [7]:
import random
import csv
import pandas as pd
from datetime import datetime, timedelta

# Canales de comunicación ajustados
canales = ["email", "chatbot", "phone"]

# Definir límites SLA por canal (en minutos)
sla_limits = {
    "chatbot": 10,    # 10 minutos
    "email": 1440,    # 24 horas
    "phone": 30       # 30 minutos
}

# Lista de grupos de resolución (hasta 10 grupos)
grupos = ["Grupo A", "Grupo B", "Grupo C", "Grupo D", "Grupo E", 
          "Grupo F", "Grupo G", "Grupo H", "Grupo I", "Grupo J"]

# Catálogo de clasificación de problemas
issue_classification_catalog = [
    "Card Issues", "Balance Inquiry", "Transaction Error", "Loan Inquiry", 
    "Payment Issues", "Account Setup", "Login Problems", "Fees Inquiry", 
    "Fraud Report", "General Inquiry"
]

# Ejemplos de comentarios (10 por cada combinación de canal y sentimiento)
# Cada comentario menciona explícitamente el canal correspondiente
comentarios = {
    "email": {
        "positive": [
            "The email support team responded quickly and solved my issue.",
            "I got a helpful email reply within an hour, great service!",
            "Email support was very professional, they fixed my login issue.",
            "The email team provided a detailed response, I’m satisfied!",
            "I received a prompt email reply that resolved my query.",
            "Email support was fantastic, they handled my issue perfectly.",
            "The email response was clear and addressed all my concerns.",
            "I’m impressed with how fast email support fixed my problem.",
            "The email team was courteous and resolved my payment issue.",
            "Email support exceeded my expectations with their quick reply."
        ],
        "negative": [
            "Email support took days to reply, this is unacceptable!",
            "The email team completely ignored my urgent request.",
            "I got a vague email response that didn’t help at all.",
            "Email support was rude and unprofessional in their reply.",
            "The email team took too long to fix my account issue.",
            "Email support didn’t even read my message properly.",
            "The email response was late and full of errors.",
            "Email support was slow and didn’t resolve my issue.",
            "The email team sent a generic reply that was useless.",
            "Email support failed to address my complaint properly."
        ],
        "neutral": [
            "Email support replied, but the response was not very detailed.",
            "The email team was okay, nothing special in their reply.",
            "I got an email response that was decent, but not great.",
            "Email support was average, they did the minimum required.",
            "The email reply was fine, but it took a bit of time.",
            "Email support was neutral, they met basic expectations.",
            "The email team’s response was fair, nothing impressive.",
            "Email support was alright, but could have been better.",
            "I received an email reply that was standard, nothing more.",
            "The email response was okay, but lacked detail."
        ]
    },
    "chatbot": {
        "positive": [
            "The chatbot resolved my issue instantly, I’m impressed!",
            "I love how the chatbot helped me check my balance quickly.",
            "The chatbot was amazing, it guided me through my transfer.",
            "Chatbot support was excellent, it answered all my questions.",
            "The chatbot made paying my bill so easy and fast.",
            "I’m happy with how the chatbot handled my account inquiry.",
            "The chatbot was quick to assist with my card issue.",
            "Chatbot support was fantastic, it saved me so much time.",
            "The chatbot explained my fees clearly, great experience!",
            "The chatbot assisted me with my loan query perfectly."
        ],
        "negative": [
            "The chatbot gave me wrong info about my account balance.",
            "The chatbot couldn’t understand my simple question.",
            "I tried the chatbot, but it kept crashing mid-session.",
            "The chatbot failed to process my payment request properly.",
            "The chatbot gave irrelevant answers to my inquiry.",
            "The chatbot was useless for my card replacement issue.",
            "The chatbot misunderstood my loan question completely.",
            "The chatbot crashed when I tried to check my fees.",
            "The chatbot gave me outdated info about my account.",
            "The chatbot couldn’t handle my transfer request at all."
        ],
        "neutral": [
            "The chatbot answered my question, but it took a while.",
            "Chatbot support was average, nothing too impressive.",
            "The chatbot helped me, but it wasn’t very quick.",
            "The chatbot was fine, it did what I needed eventually.",
            "Chatbot support was okay, but could be faster.",
            "The chatbot’s response was neutral, not bad or good.",
            "The chatbot assisted me, but it took a few tries.",
            "Chatbot support was decent, met basic expectations.",
            "The chatbot handled my query, but it was a bit slow.",
            "The chatbot was alright, nothing special to note."
        ]
    },
    "phone": {
        "positive": [
            "The phone agent was very helpful and resolved my issue fast.",
            "Phone support was excellent, the agent was so polite!",
            "I had a great experience with the phone support team today.",
            "The phone representative explained everything clearly, I’m happy!",
            "Phone support fixed my issue in minutes, fantastic service!",
            "The phone agent was patient and resolved my payment problem.",
            "I love how efficient the phone support was with my query.",
            "The phone team was professional and handled my request well.",
            "Phone support made me feel valued, they did a great job!",
            "The phone agent assisted me with my account setup perfectly."
        ],
        "negative": [
            "The phone agent was rude and didn’t solve my issue.",
            "I waited on hold for an hour with phone support, terrible!",
            "The phone representative was unhelpful and impatient.",
            "Phone support gave me conflicting info about my account.",
            "The phone agent hung up on me in the middle of the call.",
            "Phone support was awful, they didn’t know what to do.",
            "The phone team ignored my complaint completely.",
            "Phone support was slow and didn’t fix my card issue.",
            "The phone agent didn’t listen to my problem at all.",
            "Phone support was unprofessional and wasted my time."
        ],
        "neutral": [
            "Phone support was okay, nothing special to mention.",
            "The phone agent helped me, but it took some time.",
            "Phone support was average, they got the job done.",
            "The phone team was fine, nothing impressive though.",
            "Phone support was standard, met my basic needs.",
            "The phone agent was alright, no complaints really.",
            "Phone support was neutral, nothing stood out.",
            "The phone team solved my issue, but it was slow.",
            "Phone support was decent, did what was expected.",
            "The phone agent was okay, nothing remarkable."
        ]
    }
}

# Función para generar 500 comentarios únicos por canal y sentimiento
def generar_comentarios(canal, sentimiento, num_comentarios=500):
    # Plantillas base para generar comentarios
    positive_templates = [
        f"The {canal} {{action}} my {{issue}} {{adverb}}, {{expression}}!",
        f"I’m {{emotion}} with how the {canal} {{action}} my {{issue}}.",
        f"The {canal} was {{adjective}}, it {{action}} my {{issue}} {{adverb}}.",
        f"{canal.capitalize()} support {{action}} my {{issue}} {{adverb}}, great job!",
        f"I love how the {canal} {{action}} my {{issue}} so {{adverb}}."
    ]
    negative_templates = [
        f"The {canal} {{failed_action}} my {{issue}}, {{expression}}!",
        f"{canal.capitalize()} support was {{adjective}} and didn’t {{action}} my {{issue}}.",
        f"The {canal} {{failed_action}} my {{issue}} {{adverb}}, so {{adjective}}.",
        f"I waited {{time}} for the {canal}, but they {{failed_action}} my {{issue}}.",
        f"The {canal} was {{adjective}}, they {{failed_action}} my {{issue}} {{adverb}}."
    ]
    neutral_templates = [
        f"The {canal} {{action}} my {{issue}}, but it {{modifier}}.",
        f"{canal.capitalize()} support was {{adjective}}, nothing {{expression}}.",
        f"The {canal} {{action}} my {{issue}}, but it was {{modifier}}.",
        f"{canal.capitalize()} support was {{adjective}}, they {{action}} my {{issue}}.",
        f"The {canal} was {{adjective}}, {{action}} my {{issue}} {{modifier}}."
    ]

    # Listas de palabras para completar las plantillas
    actions = ["resolved", "fixed", "handled", "addressed", "answered"]
    failed_actions = ["couldn’t handle", "failed to resolve", "ignored", "didn’t fix", "messed up"]
    issues = ["balance inquiry", "payment issue", "login problem", "card issue", "loan query"]
    adverbs = ["quickly", "efficiently", "perfectly", "smoothly", "instantly"]
    expressions = ["I’m impressed", "great service", "I’m happy", "well done", "so frustrating", "terrible", "unacceptable"]
    emotions = ["happy", "satisfied", "impressed", "pleased", "glad"]
    adjectives = ["helpful", "professional", "excellent", "amazing", "useless", "rude", "unhelpful", "average", "decent", "fine"]
    times = ["forever", "too long", "an hour", "days", "way too long"]
    modifiers = ["took a while", "wasn’t quick", "was okay", "nothing special", "met expectations"]

    # Seleccionar las plantillas según el sentimiento
    if sentimiento == "positive":
        templates = positive_templates
    elif sentimiento == "negative":
        templates = negative_templates
    else:
        templates = neutral_templates

    # Generar comentarios únicos
    generated_comments = set()
    while len(generated_comments) < num_comentarios:
        template = random.choice(templates)
        comment = template.format(
            action=random.choice(actions) if sentimiento != "negative" else random.choice(failed_actions),
            failed_action=random.choice(failed_actions),
            issue=random.choice(issues),
            adverb=random.choice(adverbs) if sentimiento == "positive" else random.choice(["completely", "terribly", "poorly"]),
            expression=random.choice(expressions),
            emotion=random.choice(emotions),
            adjective=random.choice(adjectives),
            time=random.choice(times),
            modifier=random.choice(modifiers) if sentimiento == "neutral" else ""
        )
        generated_comments.add(comment)

    return list(generated_comments)

# Generar los 500 comentarios por canal y sentimiento
for canal in ["email", "chatbot", "phone"]:
    for sentimiento in ["positive", "negative", "neutral"]:
        comentarios[canal][sentimiento] = generar_comentarios(canal, sentimiento, num_comentarios=500)

# Verificar que se generaron 500 comentarios por categoría
for canal in comentarios:
    for sentimiento in comentarios[canal]:
        print(f"Canal: {canal}, Sentimiento: {sentimiento}, Número de comentarios: {len(comentarios[canal][sentimiento])}")

# Función para generar una fecha aleatoria entre 2022 y 2025
def generar_fecha_aleatoria():
    inicio = datetime(2022, 1, 1)
    fin = datetime(2025, 2, 28)
    diferencia = (fin - inicio).days
    fecha_aleatoria = inicio + timedelta(days=random.randint(0, diferencia))
    return fecha_aleatoria

# Función para determinar el sentimiento (65% positivo, 10% neutral, 25% negativo)
def get_seasonal_feeling(date):
    month = date.month
    if month in [7, 12]:  # Julio y Diciembre: más reclamos
        return random.choices(["positive", "negative", "neutral"], weights=[0.5, 0.4, 0.1])[0]
    else:  # Proporciones ajustadas
        return random.choices(["positive", "negative", "neutral"], weights=[0.65, 0.25, 0.1])[0]

# Función para generar sentiment_rate basado en el sentimiento (escala 1-5)
def generar_sentiment_rate(sentimiento):
    if sentimiento == "positive":
        return random.randint(4, 5)  # 4 o 5
    elif sentimiento == "negative":
        return random.randint(1, 2)  # 1 o 2
    else:  # neutral
        return 3  # 3

# Función para calcular el inicio de la semana (domingo)
def get_start_of_week(date):
    date_obj = datetime.strptime(date, "%Y-%m-%d")
    days_to_sunday = (date_obj.weekday() + 1) % 7
    start_of_week = date_obj - timedelta(days=days_to_sunday)
    return start_of_week.strftime("%Y-%m-%d")

# Función para calcular el inicio del mes
def get_start_of_month(date):
    date_obj = datetime.strptime(date, "%Y-%m-%d")
    start_of_month = date_obj.replace(day=1)
    return start_of_month.strftime("%Y-%m-%d")

# Función para generar un tiempo de respuesta ajustado al número de grupos
def generar_tiempo_respuesta(canal, sentimiento, num_grupos):
    # Tiempos base en minutos para cada canal y sentimiento
    base_time = {
        "chatbot": {"positive": (1, 5), "neutral": (5, 15), "negative": (10, 30)},
        "email": {"positive": (144, 720), "neutral": (720, 2880), "negative": (2880, 14400)},  # 0.1 a 10 días
        "phone": {"positive": (5, 15), "neutral": (15, 30), "negative": (30, 60)}
    }
    
    min_time, max_time = base_time[canal][sentimiento]
    
    if random.random() < 0.8:  # 80% de probabilidad de que los grupos afecten el tiempo
        multiplicador = 1 + (random.uniform(0.1, 0.3) * num_grupos)
    else:
        multiplicador = 1
    
    adjusted_max_time = min(max_time * multiplicador, {
        "chatbot": 60,
        "email": 14400,  # 10 días máximo (sin cambio)
        "phone": 180
    }[canal])
    
    return round(random.uniform(min_time, adjusted_max_time), 2)

# Función para desglosar AHT con límites ajustados
def generar_aht_components(canal, resolution_time_min):
    if canal == "email":
        aht = min(resolution_time_min, 45)  # Máximo 45 minutos para email
        return round(aht, 2), 0, 0, 0
    else:
        # Para chatbot y phone, total de AHT (talk_time + hold_time + wrap_up_time) no pasa de 15 minutos
        aht = min(resolution_time_min, 15)  # Límite de 15 minutos
        talk_time = aht * random.uniform(0.6, 0.8)  # 60-80% del tiempo
        hold_time = aht * random.uniform(0.1, 0.2)  # 10-20% del tiempo
        wrap_up_time = aht - talk_time - hold_time  # Resto del tiempo
        return round(aht, 2), round(talk_time, 2), round(hold_time, 2), round(wrap_up_time, 2)

# Función para generar un CSAT rating según el sentimiento
def generar_csat(sentimiento):
    if sentimiento == "positive":
        return random.randint(4, 5)
    elif sentimiento == "negative":
        return random.randint(1, 2)
    else:
        return random.randint(3, 4)

# Función para determinar si está dentro del SLA (1 = sí, 0 = no)
def dentro_sla(canal, tiempo_respuesta):
    return 1 if tiempo_respuesta <= sla_limits[canal] else 0

# Función para determinar resolución al primer toque (1 = sí, 0 = no)
def generar_fcr(sentimiento):
    if sentimiento == "positive":
        return random.choices([1, 0], weights=[0.9, 0.1])[0]
    elif sentimiento == "negative":
        return random.choices([1, 0], weights=[0.2, 0.8])[0]
    else:
        return random.choices([1, 0], weights=[0.6, 0.4])[0]

# Función para generar historial de grupos y conteos
def generar_group_history(sentimiento, canal):
    if sentimiento == "positive":
        num_grupos = random.choices([1, 2, 3], weights=[0.7, 0.2, 0.1])[0]
    elif sentimiento == "negative":
        num_grupos = random.choices([1, 2, 3, 4, 5], weights=[0.1, 0.2, 0.3, 0.3, 0.1])[0]
    else:
        num_grupos = random.choices([1, 2, 3, 4], weights=[0.4, 0.3, 0.2, 0.1])[0]
    
    group_history = []
    for _ in range(num_grupos):
        group = random.choice(grupos)
        group_history.append(group)
    
    history_str = ",".join(group_history)
    unique_groups = len(set(group_history))
    total_groups = len(group_history)
    
    return history_str, unique_groups, total_groups

# Función para determinar el grupo que recibe la calificación CSAT
def generar_csat_rated_group(group_history, fcr):
    groups = group_history.split(",")
    return groups[-1]

# Función para generar clasificación de problemas
def generar_issue_classification(sentimiento, canal):
    weights = {
        "positive": [0.15, 0.20, 0.05, 0.20, 0.15, 0.10, 0.05, 0.05, 0.00, 0.05],
        "neutral": [0.15, 0.20, 0.10, 0.15, 0.10, 0.10, 0.10, 0.05, 0.02, 0.03],
        "negative": [0.15, 0.05, 0.20, 0.05, 0.15, 0.05, 0.15, 0.05, 0.10, 0.05]
    }
    return random.choices(issue_classification_catalog, weights=weights[sentimiento])[0]

# Función para clasificar como PROMOTER o DETRACTOR
def get_classification(csat_rating):
    return "PROMOTER" if csat_rating >= 4 else "DETRACTOR"

# Generar datos sintéticos
datos = []
case_id = 15000
total_datos = 15000

for _ in range(total_datos):
    fecha = generar_fecha_aleatoria()
    sentimiento = get_seasonal_feeling(fecha)
    start_of_week = get_start_of_week(fecha.strftime("%Y-%m-%d"))
    start_of_month = get_start_of_month(fecha.strftime("%Y-%m-%d"))
    canal = random.choice(canales)
    sentiment_rate = generar_sentiment_rate(sentimiento)
    group_history, unique_groups, total_groups = generar_group_history(sentimiento, canal)
    
    tiempo_respuesta = generar_tiempo_respuesta(canal, sentimiento, total_groups)
    aht, talk_time, hold_time, wrap_up_time = generar_aht_components(canal, tiempo_respuesta)
    
    csat = generar_csat(sentimiento)
    classification = get_classification(csat)
    within_sla = dentro_sla(canal, tiempo_respuesta)
    fcr = generar_fcr(sentimiento)
    csat_rated_group = generar_csat_rated_group(group_history, fcr)
    issue_classification = generar_issue_classification(sentimiento, canal)
    
    # Seleccionar comentario que coincida con el canal y el sentimiento
    comentario = random.choice(comentarios[canal][sentimiento])
    
    datos.append({
        "case_id": f"CASE-{case_id}",
        "date": fecha.strftime("%Y-%m-%d"),
        "start_of_week": start_of_week,
        "start_of_month": start_of_month,
        "translated_comments": comentario,
        "sentiment": sentimiento,
        "sentiment_rate": sentiment_rate,
        "channel": canal,
        "resolution_time_min": tiempo_respuesta,
        "aht": aht,
        "talk_time": talk_time,
        "hold_time": hold_time,
        "wrap_up_time": wrap_up_time,
        "csat_rating_received": csat,
        "classification": classification,
        "resolved_in_sla": within_sla,
        "first_touch_resolution": fcr,
        "group_name_history": group_history,
        "groups": unique_groups,
        "total_groups": total_groups,
        "csat_rated_group_name": csat_rated_group,
        "issue_classification": issue_classification
    })
    case_id += 1

# Mezclar datos
random.shuffle(datos)



Canal: email, Sentimiento: positive, Número de comentarios: 500
Canal: email, Sentimiento: negative, Número de comentarios: 500
Canal: email, Sentimiento: neutral, Número de comentarios: 500
Canal: chatbot, Sentimiento: positive, Número de comentarios: 500
Canal: chatbot, Sentimiento: negative, Número de comentarios: 500
Canal: chatbot, Sentimiento: neutral, Número de comentarios: 500
Canal: phone, Sentimiento: positive, Número de comentarios: 500
Canal: phone, Sentimiento: negative, Número de comentarios: 500
Canal: phone, Sentimiento: neutral, Número de comentarios: 500


In [8]:
# Crear DataFrame
df = pd.DataFrame(datos)

# Ordenar para ver el valor máximo de resolution_time_min
sorted_df = df.sort_values(by='resolution_time_min', ascending=False).head(5)

# Exportar DataFrame a CSV
df.to_csv('call_center_data.csv', index=False, sep=';', quotechar='"')
print("Datos generados y guardados en 'call_center_data_adjusted.csv'.")
sorted_df

Datos generados y guardados en 'call_center_data_adjusted.csv'.


Unnamed: 0,case_id,date,start_of_week,start_of_month,translated_comments,sentiment,sentiment_rate,channel,resolution_time_min,aht,...,wrap_up_time,csat_rating_received,classification,resolved_in_sla,first_touch_resolution,group_name_history,groups,total_groups,csat_rated_group_name,issue_classification
9454,CASE-20714,2022-05-25,2022-05-22,2022-05-01,"The email was professional, they couldn’t hand...",negative,2,email,14399.18,45.0,...,0.0,2,DETRACTOR,0,0,Grupo F,1,1,Grupo F,Balance Inquiry
6665,CASE-18005,2024-12-15,2024-12-15,2024-12-01,"The email was excellent, they didn’t fix my ba...",negative,2,email,14387.42,45.0,...,0.0,2,DETRACTOR,0,0,"Grupo A,Grupo I",2,2,Grupo I,Login Problems
12324,CASE-29233,2023-10-11,2023-10-08,2023-10-01,"The email ignored my login problem poorly, so ...",negative,1,email,14377.44,45.0,...,0.0,2,DETRACTOR,0,1,"Grupo J,Grupo H,Grupo F,Grupo J,Grupo G",4,5,Grupo G,Login Problems
4754,CASE-16724,2022-10-20,2022-10-16,2022-10-01,"The email messed up my card issue completely, ...",negative,1,email,14374.31,45.0,...,0.0,2,DETRACTOR,0,0,"Grupo D,Grupo A",2,2,Grupo A,Payment Issues
9459,CASE-23875,2024-07-25,2024-07-21,2024-07-01,"The email didn’t fix my login problem, great s...",negative,1,email,14367.88,45.0,...,0.0,1,DETRACTOR,0,0,"Grupo G,Grupo H",2,2,Grupo H,Payment Issues
