# Imports

In [1]:
from models.dataset import Dataset
import h2o
from models.nlpmodel import NLPModel
import joblib

# Data Loading

In [2]:
sentiment = Dataset("data/customer_support_tickets.csv")

In [3]:
sentiment.head()

Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0


In [4]:
sentiment.columns()

['Ticket ID',
 'Customer Name',
 'Customer Email',
 'Customer Age',
 'Customer Gender',
 'Product Purchased',
 'Date of Purchase',
 'Ticket Type',
 'Ticket Subject',
 'Ticket Description',
 'Ticket Status',
 'Resolution',
 'Ticket Priority',
 'Ticket Channel',
 'First Response Time',
 'Time to Resolution',
 'Customer Satisfaction Rating']

In [5]:
sentiment.set_dtype("Ticket Priority", "str")

In [6]:
sentiment.dtypes()

Unnamed: 0,column,dtype
Ticket ID,Ticket ID,int64
Customer Name,Customer Name,object
Customer Email,Customer Email,object
Customer Age,Customer Age,int64
Customer Gender,Customer Gender,object
Product Purchased,Product Purchased,object
Date of Purchase,Date of Purchase,object
Ticket Type,Ticket Type,object
Ticket Subject,Ticket Subject,object
Ticket Description,Ticket Description,object


In [11]:
joblib.dump(sentiment, "persistance/sentiment.joblib")


['persistance/sentiment.joblib']

In [43]:
microsoft.get_shape()

Rows: 1511
Columns: 6


(1511, 6)

# Sentiment Analysis

In [9]:
nlp_model = NLPModel(
    dataset=sentiment,
    text_columns=["Ticket Description", "Ticket Subject"],
    target_column="Ticket Priority",  # Can be numerical or categorical
    max_models=10,
    max_runtime_secs=600  # give it enough time
)

Checking whether there is an H2O instance running at http://localhost:54321. connected.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,1 hour 15 mins
H2O_cluster_timezone:,America/Vancouver
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.6
H2O_cluster_version_age:,4 months and 20 days
H2O_cluster_name:,H2O_from_python_saiubc_6mpmu1
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.852 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [10]:
nlp_model.train()
print(nlp_model.leaderboard())

AutoML progress: |
16:56:58.384: AutoML: XGBoost is not available; skipping it.

███████████████████████████████████████████████████████████████| (done) 100%
✅ Training complete.
model_id                                                    mean_per_class_error    logloss      rmse       mse
GBM_grid_1_AutoML_11_20250322_165658_model_1                            0.736133    1.39639  0.750289  0.562934
GBM_2_AutoML_11_20250322_165658                                         0.743117    1.39444  0.750171  0.562757
XRT_1_AutoML_11_20250322_165658                                         0.747008    1.3969   0.750483  0.563224
DRF_1_AutoML_11_20250322_165658                                         0.747756    1.39671  0.75033   0.562996
GBM_1_AutoML_11_20250322_165658                                         0.74907     1.40181  0.751146  0.56422
StackedEnsemble_BestOfFamily_1_AutoML_11_20250322_165658                0.75        1.38612  0.74991   0.562365
DeepLearning_1_AutoML_11_20250322_1656

In [26]:
sample_texts = [
    "I'm very unhappy with the product setup, nothing works.",
    "I really I'm using the original charger that came with my product, but it's not charging properly.",
    "I'm unable to access my Netflix account. It keeps displaying an 'Invalid Credentials' error, even though I'm using the correct login information. How can I regain access to my account? Solution 1 I'm unable to find the option to perform the desired action in the user guide. Could you please guide me through the steps?"
]

In [27]:
predictions = nlp_model.predict(sample_texts)
print("Predicted priorities:", predictions)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
gbm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
Predicted priorities: ['Medium', 'Low', 'High']





In [28]:
print("Predicted priorities:", predictions)

Predicted priorities: ['Medium', 'Low', 'High']
