In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import load_npz
import pickle
import joblib
import time
import json

# sklearn imports
from sklearn.preprocessing import LabelEncoder
import imblearn
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import ParameterSampler
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

# classifier imports
import xgboost as xgb
from sklearn.naive_bayes import MultinomialNB

seed = 1

## Load models and test data

In [None]:
# load test data

# labels
load_path = "../data/clean/test_labels.xlsx"
df_labels = pd.read_excel(load_path, index_col=0).reset_index()

# tfidfs
tfidf_cluster = load_npz("../data/interim/tfidfs/tfidf_350k_1percent_422words_test.npz")
tfidf_authorization = load_npz(
    "../data/interim/tfidfs/tfidf_authorization_1percent_401words_test.npz"
)
tfidf_order = load_npz(
    "../data/interim/tfidfs/tfidf_order related and payments_1percent_439words_test.npz"
)
tfidf_product = load_npz(
    "../data/interim/tfidfs/tfidf_product queries_1percent_351words_test.npz"
)
tfidf_queries_website = load_npz(
    "../data/interim/tfidfs/tfidf_queries regarding website_1percent_274words_test.npz"
)
tfidf_warranty = load_npz(
    "../data/interim/tfidfs/tfidf_warranty_1percent_433words_test.npz"
)

In [None]:
# # load val data

# # labels
# load_path = "../data/clean/validation_labels.xlsx"
# df_labels = pd.read_excel(load_path, index_col=0).reset_index()

# # tfidfs
# tfidf_cluster = load_npz("../data/interim/tfidfs/tfidf_350k_1percent_422words_val.npz")
# tfidf_authorization = load_npz(
#     "../data/interim/tfidfs/tfidf_authorization_1percent_401words_val.npz"
# )
# tfidf_order = load_npz(
#     "../data/interim/tfidfs/tfidf_order related and payments_1percent_439words_val.npz"
# )
# tfidf_product = load_npz(
#     "../data/interim/tfidfs/tfidf_product queries_1percent_351words_val.npz"
# )
# tfidf_queries_website = load_npz(
#     "../data/interim/tfidfs/tfidf_queries regarding website_1percent_274words_val.npz"
# )
# tfidf_warranty = load_npz(
#     "../data/interim/tfidfs/tfidf_warranty_1percent_433words_val.npz"
# )

In [None]:
# load models and label encoders
model_load_path = "../data/results/models/"

# model_warranty = joblib.load(model_load_path + "xgb_warranty_jlib")
# le_warranty = joblib.load(model_load_path + "lab_encoder_warranty_jlib")

# models
with open(model_load_path + "xgb_cluster_42_Noover_pkl", "rb") as f:
    model_cluster = pickle.load(f)
with open(model_load_path + "xgb_authorization_pkl", "rb") as f:
    model_authorization = pickle.load(f)
with open(model_load_path + "xgb_order_pkl", "rb") as f:
    model_order = pickle.load(f)
with open(model_load_path + "xgb_product_pkl", "rb") as f:
    model_product = pickle.load(f)
with open(model_load_path + "xgb_queries_website_pkl", "rb") as f:
    model_queries_website = pickle.load(f)
with open(model_load_path + "xgb_warranty_pkl", "rb") as f:
    model_warranty = pickle.load(f)

# label encoders
with open(model_load_path + "lab_encoder_cluster_pkl", "rb") as f:
    le_cluster = pickle.load(f)
with open(model_load_path + "lab_encoder_authorization_pkl", "rb") as f:
    le_authorization = pickle.load(f)
with open(model_load_path + "lab_encoder_order_pkl", "rb") as f:
    le_order = pickle.load(f)
with open(model_load_path + "lab_encoder_product_pkl", "rb") as f:
    le_product = pickle.load(f)
with open(model_load_path + "lab_encoder_queries_website_pkl", "rb") as f:
    le_queries_website = pickle.load(f)
with open(model_load_path + "lab_encoder_warranty_pkl", "rb") as f:
    le_warranty = pickle.load(f)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## Some preprocessing

In [None]:
# prepare main data DF that will persist till the end
df_labels

Unnamed: 0,index,label
0,0,product availability and stock
1,1,bad customer service
2,2,product details inquiry
3,3,schedule repair
4,4,product details inquiry
...,...,...
47411,47411,delivery tracking
47412,47412,reschedule delivery
47413,47413,product availability and stock
47414,47414,cancel order


In [None]:
# cluster labels
clusters = {
    "authorization": [
        "account cancellation",
        "account security",
        "login issues",
        "forgot my password",
        "software update",
    ],
    "order related and payments": [
        "best buy credit card",
        "payment failed",
        "billing or charge disputes",
        "cancel order",
        "unauthorized charge or payment",
        "refund request",
        "fraud concerns",
        "return request",
        "cancellation of a plan subscription or membership",
        "account cancellation",
        "change or update order",
        "schedule order pickup",
        "change shipping time",
        "delivery tracking",
        "refund status",
        "change payment method",
        "payment method",
        "change shipping address",
        "delivery or parts of delivery items missing",
        "renewal of a plan subscription or membership",
        "reschedule delivery",
        "reschedule order pickup",
        "rewards or discounts",
        "schedule delivery",
        "trade in inquiry",
        "delivery delays",
    ],
    "warranty": [
        "check warranty coverage",
        "damaged product",
        "warranty claim",
        "reschedule repair",
        "device damaged",
        "incomplete installation",
        "lost or forgot items",
        "reschedule installation",
        "schedule repair",
        "screen issues",
        "software error",
        "software installation",
        "schedule installation",
        "troubleshooting",
        "performance issues",
        "defective product",
    ],
    "queries regarding website": [
        "employment or career inquiries",
        "website or app complaints",
        "incomplete installation",
        "miscellaneous inquiries",
        "network or connectivity issues",
        "customer feedback",
        "bad customer service",
    ],
    "product queries": [
        "price match",  # 6759
        "product availability and stock",  # 37972
        "product compatibility",  # 10897
        "product details inquiry",  # 42698
        "transfer call to the right department or store",  # 5869
    ],
}

# other dict
other_dict = {
    "warranty": [
        "screen issues",
        "device damaged",
        "check warranty coverage",
        "lost or forgot items",
        "reschedule installation",
        "performance issues",
    ],
    "order related and payments": [
        "change shipping time",
        "best buy credit card",
        "payment failed",
        "account cancellation",
        "reschedule order pickup",
    ],
    "product queries": [],
    "queries regarding website": [
        "website or app complaints",
        "incomplete installation",
        "network or connectivity issues",
    ],
    "authorization": [],
}

# create a reverse lookup dict,
# i.e. keys are labels and values are cluster labels
cluster_lookup_dict = {}
for cluster_label, v in clusters.items():
    for label in v:
        cluster_lookup_dict[label] = cluster_label

df_labels["cluster_label"] = df_labels["label"].apply(lambda x: cluster_lookup_dict[x])

# change labels to "other"
df_labels["label"] = df_labels["label"].apply(
    lambda x: "other" if x in other_dict[cluster_lookup_dict[x]] else x
)

In [None]:
cluster_lookup_dict

{'account cancellation': 'order related and payments',
 'account security': 'authorization',
 'login issues': 'authorization',
 'forgot my password': 'authorization',
 'software update': 'authorization',
 'best buy credit card': 'order related and payments',
 'payment failed': 'order related and payments',
 'billing or charge disputes': 'order related and payments',
 'cancel order': 'order related and payments',
 'unauthorized charge or payment': 'order related and payments',
 'refund request': 'order related and payments',
 'fraud concerns': 'order related and payments',
 'return request': 'order related and payments',
 'cancellation of a plan subscription or membership': 'order related and payments',
 'change or update order': 'order related and payments',
 'schedule order pickup': 'order related and payments',
 'change shipping time': 'order related and payments',
 'delivery tracking': 'order related and payments',
 'refund status': 'order related and payments',
 'change payment met

In [None]:
df_labels["label"].value_counts()

label
product details inquiry                              5337
product availability and stock                       4746
schedule repair                                      4423
change or update order                               3037
defective product                                    2409
other                                                1996
schedule installation                                1827
troubleshooting                                      1781
renewal of a plan subscription or membership         1550
product compatibility                                1362
return request                                       1329
damaged product                                      1133
schedule order pickup                                1075
cancel order                                          982
software error                                        938
miscellaneous inquiries                               918
reschedule delivery                                   852
price ma

## Cluster model inference

In [None]:
# prepare X, y
X = tfidf_cluster.toarray()
y = df_labels["cluster_label"].values
y = le_cluster.transform(y)

In [None]:
# run inference
y_pred_cluster = model_cluster.predict(X)

In [None]:
# prepare for the next layer of classifiers
y_pred_cluster = pd.Series(le_cluster.inverse_transform(y_pred_cluster)).reset_index()

In [None]:
# print f1-score and classification report for this classifier
micro_f1_score = f1_score(
    df_labels["cluster_label"].values, y_pred_cluster[0].values, average="micro"
)
print(micro_f1_score)

clf_report = classification_report(
    df_labels["cluster_label"].values, y_pred_cluster[0].values
)
print(clf_report)

0.7615151003880547
                            precision    recall  f1-score   support

             authorization       0.55      0.68      0.61      1074
order related and payments       0.86      0.79      0.82     15866
           product queries       0.71      0.74      0.73     13024
 queries regarding website       0.47      0.60      0.53      1938
                  warranty       0.78      0.77      0.77     15514

                  accuracy                           0.76     47416
                 macro avg       0.67      0.72      0.69     47416
              weighted avg       0.77      0.76      0.76     47416



In [None]:
y_pred_cluster[0].value_counts()

0
warranty                      15295
order related and payments    14714
product queries               13637
queries regarding website      2452
authorization                  1318
Name: count, dtype: int64

## Layer 2 inferences

#### Cluster: Authorization

In [None]:
# prepare data

# filter obs classified to this cluster
cluster_name = "authorization"

df_test = pd.DataFrame.sparse.from_spmatrix(tfidf_authorization)
df_test = df_test[y_pred_cluster[0] == cluster_name]

# prepare X
X = df_test.to_numpy()

In [None]:
# run inference
y_pred_authorization = model_authorization.predict(X)

In [None]:
# prepare for the next layer of classifiers
y_pred_authorization = pd.Series(
    le_authorization.inverse_transform(y_pred_authorization), index=df_test.index
)

In [None]:
y_pred_authorization.value_counts()

login issues          505
forgot my password    479
account security      288
software update        46
Name: count, dtype: int64

In [None]:
# print f1-score and classification report for this classifier
y_true = df_labels.loc[y_pred_authorization.index]["label"].values
y_pred = y_pred_authorization.values

micro_f1_score = f1_score(y_true, y_pred, average="micro")
print(micro_f1_score)

clf_report = classification_report(y_true, y_pred)
print(clf_report)

0.37556904400606983
                                                precision    recall  f1-score   support

                              account security       0.24      0.55      0.34       127
                          bad customer service       0.00      0.00      0.00         5
                    billing or charge disputes       0.00      0.00      0.00         9
                        change or update order       0.00      0.00      0.00        89
                         change payment method       0.00      0.00      0.00         6
                       change shipping address       0.00      0.00      0.00         2
                             customer feedback       0.00      0.00      0.00         1
                               damaged product       0.00      0.00      0.00         1
                             defective product       0.00      0.00      0.00         5
   delivery or parts of delivery items missing       0.00      0.00      0.00         1
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Cluster: Order related and payments

In [None]:
# prepare data

# filter obs classified to this cluster
cluster_name = "order related and payments"

df_test = pd.DataFrame.sparse.from_spmatrix(tfidf_order)
df_test = df_test[y_pred_cluster[0] == cluster_name]

# prepare X
X = df_test.to_numpy()

In [None]:
# run inference
y_pred_order = model_order.predict(X)

In [None]:
# prepare for the next layer of classifiers
y_pred_order = pd.Series(le_order.inverse_transform(y_pred_order), index=df_test.index)

In [None]:
y_pred_order.value_counts()

change or update order                               3189
renewal of a plan subscription or membership         1720
return request                                       1489
schedule order pickup                                1126
cancellation of a plan subscription or membership     833
cancel order                                          800
delivery tracking                                     728
reschedule delivery                                   706
rewards or discounts                                  655
unauthorized charge or payment                        533
refund request                                        514
trade in inquiry                                      384
change payment method                                 376
schedule delivery                                     286
payment method                                        269
change shipping address                               242
billing or charge disputes                            214
fraud concerns

In [None]:
# print f1-score and classification report for this classifier
y_true = df_labels.loc[y_pred_order.index]["label"].values
y_pred = y_pred_order.values

micro_f1_score = f1_score(y_true, y_pred, average="micro")
print(micro_f1_score)

clf_report = classification_report(y_true, y_pred)
print(clf_report)

0.5302433056952562


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                                   precision    recall  f1-score   support

                                 account security       0.00      0.00      0.00        47
                             bad customer service       0.00      0.00      0.00        41
                       billing or charge disputes       0.43      0.28      0.34       328
                                     cancel order       0.60      0.53      0.56       915
cancellation of a plan subscription or membership       0.65      0.70      0.67       776
                           change or update order       0.41      0.64      0.50      2043
                            change payment method       0.47      0.41      0.44       423
                          change shipping address       0.64      0.52      0.57       299
                                customer feedback       0.00      0.00      0.00        32
                                  damaged product       0.00      0.00      0.00       25

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Cluster: Product queries

In [None]:
# prepare data

# filter obs classified to this cluster
cluster_name = "product queries"

df_test = pd.DataFrame.sparse.from_spmatrix(tfidf_product)
df_test = df_test[y_pred_cluster[0] == cluster_name]

# prepare X
X = df_test.to_numpy()

In [None]:
# run inference
y_pred_product = model_product.predict(X)

In [None]:
# prepare for the next layer of classifiers
y_pred_product = pd.Series(
    le_product.inverse_transform(y_pred_product), index=df_test.index
)

In [None]:
y_pred_product.value_counts()

product details inquiry                           6617
product availability and stock                    5063
price match                                        821
transfer call to the right department or store     578
product compatibility                              543
wrong_cluster                                       15
Name: count, dtype: int64

In [None]:
# print f1-score and classification report for this classifier
y_true = df_labels.loc[y_pred_product.index]["label"].values
y_pred = y_pred_product.values

micro_f1_score = f1_score(y_true, y_pred, average="micro")
print(micro_f1_score)

clf_report = classification_report(y_true, y_pred)
print(clf_report)

0.5411014152672875


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                                   precision    recall  f1-score   support

                                 account security       0.00      0.00      0.00        17
                             bad customer service       0.00      0.00      0.00        75
                       billing or charge disputes       0.00      0.00      0.00        26
                                     cancel order       0.00      0.00      0.00        13
cancellation of a plan subscription or membership       0.00      0.00      0.00         9
                           change or update order       0.00      0.00      0.00       565
                            change payment method       0.00      0.00      0.00        32
                          change shipping address       0.00      0.00      0.00        42
                                customer feedback       0.00      0.00      0.00        32
                                  damaged product       0.00      0.00      0.00       18

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Cluster: Queries regarding website

In [None]:
# prepare data

# filter obs classified to this cluster
cluster_name = "queries regarding website"

df_test = pd.DataFrame.sparse.from_spmatrix(tfidf_queries_website)
df_test = df_test[y_pred_cluster[0] == cluster_name]

# prepare X
X = df_test.to_numpy()

In [None]:
# run inference
y_pred_queries_website = model_queries_website.predict(X)

In [None]:
# prepare for the next layer of classifiers
y_pred_queries_website = pd.Series(
    le_queries_website.inverse_transform(y_pred_queries_website), index=df_test.index
)

In [None]:
y_pred_queries_website.value_counts()

miscellaneous inquiries           1612
bad customer service               356
employment or career inquiries     333
other                               82
customer feedback                   60
wrong_cluster                        9
Name: count, dtype: int64

In [None]:
# print f1-score and classification report for this classifier
y_true = df_labels.loc[y_pred_queries_website.index]["label"].values
y_pred = y_pred_queries_website.values

micro_f1_score = f1_score(y_true, y_pred, average="micro")
print(micro_f1_score)

clf_report = classification_report(y_true, y_pred)
print(clf_report)

0.3556280587275693
                                                   precision    recall  f1-score   support

                                 account security       0.00      0.00      0.00        10
                             bad customer service       0.22      0.42      0.29       192
                       billing or charge disputes       0.00      0.00      0.00         8
                                     cancel order       0.00      0.00      0.00         3
cancellation of a plan subscription or membership       0.00      0.00      0.00         2
                           change or update order       0.00      0.00      0.00        59
                            change payment method       0.00      0.00      0.00         6
                          change shipping address       0.00      0.00      0.00         9
                                customer feedback       0.18      0.22      0.20        49
                                  damaged product       0.00      0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Cluster: Warranty

In [None]:
# prepare data

# filter obs classified to this cluster
cluster_name = "warranty"

df_test = pd.DataFrame.sparse.from_spmatrix(tfidf_warranty)
df_test = df_test[y_pred_cluster[0] == cluster_name]

# prepare X
X = df_test.to_numpy()

In [None]:
# run inference
y_pred_warranty = model_warranty.predict(X)

In [None]:
# prepare for the next layer of classifiers
y_pred_warranty = pd.Series(
    le_warranty.inverse_transform(y_pred_warranty), index=df_test.index
)

In [None]:
y_pred_warranty.value_counts()

schedule repair          6776
defective product        2154
schedule installation    2056
troubleshooting          1114
other                     926
software error            733
damaged product           409
reschedule repair         381
software installation     336
warranty claim            294
wrong_cluster             116
Name: count, dtype: int64

In [None]:
# print f1-score and classification report for this classifier
y_true = df_labels.loc[y_pred_warranty.index]["label"].values
y_pred = y_pred_warranty.values

micro_f1_score = f1_score(y_true, y_pred, average="micro")
print(micro_f1_score)

clf_report = classification_report(y_true, y_pred)
print(clf_report)

0.45786204642039885


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                                   precision    recall  f1-score   support

                                 account security       0.00      0.00      0.00        11
                             bad customer service       0.00      0.00      0.00        33
                       billing or charge disputes       0.00      0.00      0.00         4
                                     cancel order       0.00      0.00      0.00        51
cancellation of a plan subscription or membership       0.00      0.00      0.00         9
                           change or update order       0.00      0.00      0.00       281
                            change payment method       0.00      0.00      0.00         5
                          change shipping address       0.00      0.00      0.00        22
                                customer feedback       0.00      0.00      0.00        14
                                  damaged product       0.48      0.29      0.36       67

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Final metrics

In [None]:
y_pred_warranty

3              schedule repair
4              schedule repair
9               software error
10           defective product
15             schedule repair
                 ...          
47403          troubleshooting
47408    schedule installation
47410        defective product
47412           software error
47415          schedule repair
Length: 15295, dtype: object

In [None]:
# collate predictions
y_preds = pd.concat(
    [
        y_pred_authorization,
        y_pred_order,
        y_pred_product,
        y_pred_queries_website,
        y_pred_warranty,
    ]
).sort_index()

In [None]:
y_preds

0        product availability and stock
1               miscellaneous inquiries
2               product details inquiry
3                       schedule repair
4                       schedule repair
                      ...              
47411                      cancel order
47412                    software error
47413    product availability and stock
47414                      cancel order
47415                   schedule repair
Length: 47416, dtype: object

In [None]:
# print overall f1-score and classification report 
y_true = df_labels["label"].values
y_pred = y_preds.values

micro_f1_score = f1_score(y_true, y_pred, average="micro")
print(micro_f1_score)

clf_report = classification_report(y_true, y_pred)
print(clf_report)

0.496688881390248


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                                                   precision    recall  f1-score   support

                                 account security       0.24      0.33      0.28       212
                             bad customer service       0.22      0.23      0.23       346
                       billing or charge disputes       0.43      0.25      0.32       375
                                     cancel order       0.60      0.49      0.54       982
cancellation of a plan subscription or membership       0.65      0.68      0.66       796
                           change or update order       0.41      0.43      0.42      3037
                            change payment method       0.47      0.37      0.41       472
                          change shipping address       0.64      0.41      0.50       374
                                customer feedback       0.18      0.09      0.12       128
                                  damaged product       0.48      0.17      0.26      113

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# how many were classified as "other" or "wrong cluster"?
y_preds.value_counts()

schedule repair                                      6776
product details inquiry                              6617
product availability and stock                       5063
change or update order                               3189
defective product                                    2154
schedule installation                                2056
renewal of a plan subscription or membership         1720
miscellaneous inquiries                              1612
return request                                       1489
other                                                1170
schedule order pickup                                1126
troubleshooting                                      1114
cancellation of a plan subscription or membership     833
price match                                           821
cancel order                                          800
software error                                        733
delivery tracking                                     728
reschedule del