In [6]:
import os
import shutil

# Make the required directory
os.makedirs("/root/.kaggle", exist_ok=True)

# Copy your uploaded kaggle.json to the correct path
shutil.copy("/content/kaggle/kaggle.json", "/root/.kaggle/kaggle.json")

# Set correct permissions
os.chmod("/root/.kaggle/kaggle.json", 600)


In [7]:
!kaggle datasets download -d suraj520/customer-support-ticket-dataset

Dataset URL: https://www.kaggle.com/datasets/suraj520/customer-support-ticket-dataset
License(s): CC0-1.0
Downloading customer-support-ticket-dataset.zip to /content
  0% 0.00/828k [00:00<?, ?B/s]
100% 828k/828k [00:00<00:00, 516MB/s]


In [15]:
import pandas as pd
import zipfile

# Unzip the downloaded file
with zipfile.ZipFile("customer-support-ticket-dataset.zip", "r") as zip_ref:
    zip_ref.extractall("data")

# Load the dataset
df = pd.read_csv("/content/data/customer_support_tickets.csv")
display(df.head())

Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0


In [16]:
# Check for missing values
print("Missing values per column:")
print(df.isnull().sum())

# Clean up the 'Ticket Description' column
df['Ticket Description'] = df['Ticket Description'].str.replace(r'{product_purchased}', '', regex=True)

print("\nCleaned 'Ticket Description' column:")
display(df[['Ticket Description']].head())

Missing values per column:
Ticket ID                          0
Customer Name                      0
Customer Email                     0
Customer Age                       0
Customer Gender                    0
Product Purchased                  0
Date of Purchase                   0
Ticket Type                        0
Ticket Subject                     0
Ticket Description                 0
Ticket Status                      0
Resolution                      5700
Ticket Priority                    0
Ticket Channel                     0
First Response Time             2819
Time to Resolution              5700
Customer Satisfaction Rating    5700
dtype: int64

Cleaned 'Ticket Description' column:


Unnamed: 0,Ticket Description
0,I'm having an issue with the . Please assist.\...
1,I'm having an issue with the . Please assist.\...
2,I'm facing a problem with my . The is not tur...
3,I'm having an issue with the . Please assist.\...
4,I'm having an issue with the . Please assist.\...


In [12]:
!pip install transformers



In [17]:
from transformers import pipeline

# Load the zero-shot classification pipeline
classifier = pipeline("zero-shot-classification")

# Define the candidate labels (ticket categories)
candidate_labels = ['Technical issue', 'Billing inquiry', 'Product inquiry', 'Account access', 'Data loss']

# Get a few sample ticket descriptions
sample_tickets = df['Ticket Description'].head().tolist()

# Classify the sample tickets
for ticket in sample_tickets:
    result = classifier(ticket, candidate_labels)
    print(f"Ticket: {ticket}")
    print(f"Predicted labels: {result['labels']}")
    print(f"Scores: {result['scores']}\n")

No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


Ticket: I'm having an issue with the . Please assist.

Your billing zip code is: 71701.

We appreciate that you have requested a website address.

Please double check your email address. I've tried troubleshooting steps mentioned in the user manual, but the issue persists.
Predicted labels: ['Billing inquiry', 'Technical issue', 'Product inquiry', 'Account access', 'Data loss']
Scores: [0.4026695191860199, 0.36453503370285034, 0.12670792639255524, 0.05335420370101929, 0.05273332819342613]

Ticket: I'm having an issue with the . Please assist.

If you need to change an existing product.

I'm having an issue with the . Please assist.

If The issue I'm facing is intermittent. Sometimes it works fine, but other times it acts up unexpectedly.
Predicted labels: ['Product inquiry', 'Technical issue', 'Billing inquiry', 'Account access', 'Data loss']
Scores: [0.6769711971282959, 0.19635339081287384, 0.05294961854815483, 0.04101216048002243, 0.032713621854782104]

Ticket: I'm facing a problem w

In [14]:
# Create a few-shot prompt
few_shot_prompt = """
Classify the following support ticket descriptions into one of these categories: Technical issue, Billing inquiry, Product inquiry, Account access, Data loss.

Here are some examples:

Ticket: "I can't log in to my account."
Category: Account access

Ticket: "My credit card was charged twice for the same subscription."
Category: Billing inquiry

Ticket: "The software is crashing every time I try to open it."
Category: Technical issue

Now, classify this ticket:

Ticket: "{ticket}"
Category:
"""

# Test with a sample ticket
sample_ticket = df['Ticket Description'].iloc[10] # You can change this index to test other tickets

# Fill the prompt with the sample ticket
prompt = few_shot_prompt.format(ticket=sample_ticket)

# Use a text-generation pipeline for few-shot learning
text_generator = pipeline("text-generation", model="distilgpt2")
result = text_generator(prompt, max_length=150, num_return_sequences=1)

print(f"Sample Ticket: {sample_ticket}")
print(f"\nGenerated Text: {result[0]['generated_text']}")

# Note: The output will be the full text including the prompt.
# We are interested in the category that the model generates after the "Category:" label.

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Sample Ticket: I'm having an issue with the . Please assist. 1-800-799-0808.

Product Search: What's New in 2-3-4-5? Report Feedback Customer Service is your best I'm using the original charger that came with my , but it's not charging properly.

Generated Text: 
Classify the following support ticket descriptions into one of these categories: Technical issue, Billing inquiry, Product inquiry, Account access, Data loss.

Here are some examples:

Ticket: "I can't log in to my account."
Category: Account access

Ticket: "My credit card was charged twice for the same subscription."
Category: Billing inquiry

Ticket: "The software is crashing every time I try to open it."
Category: Technical issue

Now, classify this ticket:

Ticket: "I'm having an issue with the . Please assist. 1-800-799-0808.

Product Search: What's New in 2-3-4-5? Report Feedback Customer Service is your best I'm using the original charger that came with my , but it's not charging properly."
Category:
14-17?
Category: T