# Proyecto de Automatización

In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [28]:
df = pd.read_csv("customer_support_tickets.csv")
df.head()

Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0


In [29]:
df["Ticket Status"].value_counts()

Ticket Status
Pending Customer Response    2881
Open                         2819
Closed                       2769
Name: count, dtype: int64

In [30]:
def map_next_action(status):
    if status == "Open":
        return "responder"
    elif status == "Pending Customer Response":
        return "esperar"
    elif status == "Closed":
        return "cerrar"
    else:
        return "otros"  
    
df["Next Action"] = df["Ticket Status"].apply(map_next_action)

In [31]:
df.isnull().sum()

Ticket ID                          0
Customer Name                      0
Customer Email                     0
Customer Age                       0
Customer Gender                    0
Product Purchased                  0
Date of Purchase                   0
Ticket Type                        0
Ticket Subject                     0
Ticket Description                 0
Ticket Status                      0
Resolution                      5700
Ticket Priority                    0
Ticket Channel                     0
First Response Time             2819
Time to Resolution              5700
Customer Satisfaction Rating    5700
Next Action                        0
dtype: int64

In [32]:
df = df.drop(columns=["Ticket Status", "Resolution", "Customer Name", "Customer Email", "Customer Satisfaction Rating", "Customer Gender", "Time to Resolution", "Ticket Description", "First Response Time"])
df.head()

Unnamed: 0,Ticket ID,Customer Age,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Priority,Ticket Channel,Next Action
0,1,32,GoPro Hero,2021-03-22,Technical issue,Product setup,Critical,Social media,esperar
1,2,42,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,Critical,Chat,esperar
2,3,48,Dell XPS,2020-07-14,Technical issue,Network problem,Low,Social media,cerrar
3,4,27,Microsoft Office,2020-11-13,Billing inquiry,Account access,Low,Social media,cerrar
4,5,67,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,Low,Email,cerrar


In [33]:
def datetime_to_timestamp(df, col_name, new_col_name):
    # Convierte la columna a formato datetime, valores inválidos se vuelven NaT
    df[col_name] = pd.to_datetime(df[col_name], errors="coerce")
    
    # Crea una máscara booleana: True donde hay fechas válidas, False donde hay NaTs
    mask = df[col_name].notna()
    
    # Inicializa la nueva columna con valores NaN
    df[new_col_name] = np.nan
    
    # Convierte solo las fechas válidas a timestamp (segundos desde 1970)
    df.loc[mask, new_col_name] = df.loc[mask, col_name].astype("int64") / 1e9
    
    # Elimina la columna original de datetime
    df = df.drop(columns=[col_name])
    
    # Retorna el dataframe modificado
    return df

df = datetime_to_timestamp(df, "Date of Purchase", "Purchase Timestamp")
df.head()

Unnamed: 0,Ticket ID,Customer Age,Product Purchased,Ticket Type,Ticket Subject,Ticket Priority,Ticket Channel,Next Action,Purchase Timestamp
0,1,32,GoPro Hero,Technical issue,Product setup,Critical,Social media,esperar,1616371000.0
1,2,42,LG Smart TV,Technical issue,Peripheral compatibility,Critical,Chat,esperar,1621642000.0
2,3,48,Dell XPS,Technical issue,Network problem,Low,Social media,cerrar,1594685000.0
3,4,27,Microsoft Office,Billing inquiry,Account access,Low,Social media,cerrar,1605226000.0
4,5,67,Autodesk AutoCAD,Billing inquiry,Data loss,Low,Email,cerrar,1580774000.0


In [34]:
df.isna().sum()

Ticket ID             0
Customer Age          0
Product Purchased     0
Ticket Type           0
Ticket Subject        0
Ticket Priority       0
Ticket Channel        0
Next Action           0
Purchase Timestamp    0
dtype: int64

In [35]:
X = df.drop(columns=["Next Action"])
y = df["Next Action"]

In [36]:
categorical_cols = [
    "Product Purchased",
    "Ticket Type",
    "Ticket Priority",
    "Ticket Channel",
    "Ticket Subject"
]

numerical_cols = [
    "Customer Age",
    "Purchase Timestamp"
]

In [37]:
X_encoded = pd.get_dummies(X, columns=categorical_cols)

X_temp, X_test, y_temp, y_test = train_test_split(X_encoded, y, test_size=0.2, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

In [38]:
X_encoded

Unnamed: 0,Ticket ID,Customer Age,Purchase Timestamp,Product Purchased_Adobe Photoshop,Product Purchased_Amazon Echo,Product Purchased_Amazon Kindle,Product Purchased_Apple AirPods,Product Purchased_Asus ROG,Product Purchased_Autodesk AutoCAD,Product Purchased_Bose QuietComfort,...,Ticket Subject_Hardware issue,Ticket Subject_Installation support,Ticket Subject_Network problem,Ticket Subject_Payment issue,Ticket Subject_Peripheral compatibility,Ticket Subject_Product compatibility,Ticket Subject_Product recommendation,Ticket Subject_Product setup,Ticket Subject_Refund request,Ticket Subject_Software bug
0,1,32,1.616371e+09,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,2,42,1.621642e+09,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
2,3,48,1.594685e+09,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
3,4,27,1.605226e+09,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,5,67,1.580774e+09,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8464,8465,22,1.638922e+09,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
8465,8466,27,1.582330e+09,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
8466,8467,57,1.629158e+09,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
8467,8468,54,1.634342e+09,False,False,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False


## Parte 1: Selección de siguiente acción

In [39]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report


In [40]:

dt = DecisionTreeClassifier(random_state=42)

param_grid = {
    "max_depth": [3, 5, 7, 10, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4],
    "criterion": ["gini", "entropy"]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(
    estimator=dt,
    param_grid=param_grid,
    scoring="f1_macro",
    cv=cv,
    n_jobs=-1,           
    verbose=1
)

grid_search.fit(X_train, y_train)

print("Mejores hiperparámetros:")
print(grid_search.best_params_)

y_val_pred = grid_search.best_estimator_.predict(X_val)
print("\nReporte de validación:")
print(classification_report(y_val, y_val_pred))

Fitting 5 folds for each of 90 candidates, totalling 450 fits
Mejores hiperparámetros:
{'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10}

Reporte de validación:
              precision    recall  f1-score   support

      cerrar       0.33      0.33      0.33       554
     esperar       0.35      0.33      0.34       576
   responder       0.33      0.35      0.34       564

    accuracy                           0.34      1694
   macro avg       0.34      0.34      0.34      1694
weighted avg       0.34      0.34      0.34      1694



In [41]:

y_test_pred = grid_search.best_estimator_.predict(X_test)

print("\nReporte final en el test set:")
print(classification_report(y_test, y_test_pred))


Reporte final en el test set:
              precision    recall  f1-score   support

      cerrar       0.30      0.31      0.31       554
     esperar       0.35      0.37      0.36       576
   responder       0.34      0.32      0.33       564

    accuracy                           0.33      1694
   macro avg       0.33      0.33      0.33      1694
weighted avg       0.33      0.33      0.33      1694



In [None]:
# Redes neuronales

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
import random

In [43]:
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True 
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [44]:
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

In [45]:
X_train_tensor = torch.tensor(X_train.values.astype(np.float32))
X_val_tensor = torch.tensor(X_val.values.astype(np.float32))
X_test_tensor = torch.tensor(X_test.values.astype(np.float32))
y_train_tensor = torch.tensor(y_train_enc, dtype=torch.long)
y_val_tensor = torch.tensor(y_val_enc, dtype=torch.long)
y_test_tensor = torch.tensor(y_test_enc, dtype=torch.long)


In [46]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

In [47]:
class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.5),

            nn.Linear(512, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.1),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),

            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]
output_dim = len(le.classes_)
model = SimpleNN(input_dim, output_dim)


In [48]:

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")


Epoch 1/100, Loss: 182.3232
Epoch 2/100, Loss: 179.0913
Epoch 3/100, Loss: 178.1289
Epoch 4/100, Loss: 176.7670
Epoch 5/100, Loss: 176.9007
Epoch 6/100, Loss: 176.8054
Epoch 7/100, Loss: 176.4394
Epoch 8/100, Loss: 176.2972
Epoch 9/100, Loss: 176.7193
Epoch 10/100, Loss: 175.9847
Epoch 11/100, Loss: 176.1068
Epoch 12/100, Loss: 175.8917
Epoch 13/100, Loss: 175.8799
Epoch 14/100, Loss: 175.3976
Epoch 15/100, Loss: 175.8635
Epoch 16/100, Loss: 175.7299
Epoch 17/100, Loss: 175.7452
Epoch 18/100, Loss: 175.6832
Epoch 19/100, Loss: 175.6672
Epoch 20/100, Loss: 175.3071
Epoch 21/100, Loss: 175.8044
Epoch 22/100, Loss: 175.7312
Epoch 23/100, Loss: 175.6151
Epoch 24/100, Loss: 175.5520
Epoch 25/100, Loss: 175.5430
Epoch 26/100, Loss: 176.0224
Epoch 27/100, Loss: 175.5141
Epoch 28/100, Loss: 175.7479
Epoch 29/100, Loss: 175.6431
Epoch 30/100, Loss: 175.4552
Epoch 31/100, Loss: 176.0435
Epoch 32/100, Loss: 175.4876
Epoch 33/100, Loss: 175.5096
Epoch 34/100, Loss: 175.4810
Epoch 35/100, Loss: 175

In [49]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(y_batch.numpy())

print("\nReporte en test:")
print(classification_report(all_labels, all_preds, zero_division=0))



Reporte en test:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       554
           1       0.34      1.00      0.51       576
           2       0.00      0.00      0.00       564

    accuracy                           0.34      1694
   macro avg       0.11      0.33      0.17      1694
weighted avg       0.12      0.34      0.17      1694



In [50]:
le.classes_

array(['cerrar', 'esperar', 'responder'], dtype=object)