In [183]:
%run config.py

In [34]:
import numpy as np
import pandas as pd
from copy import deepcopy

#visualization packages
import seaborn as sns
import matplotlib.pyplot as plt

# NLP modules we will use for text normalization
import re #regex 
import nltk # the natural language toolkit
from nltk.tokenize import word_tokenize
from nltk import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk import pos_tag

# feature construction
from sklearn.feature_extraction.text import TfidfVectorizer #use this to create BoW matrix

In [23]:
df = pd.read_csv('Data/customer_support_tickets.csv')

In [24]:
import pyLDAvis.lda_model


#modeling and dimensionality reduction for visuaization
from sklearn.decomposition import NMF
from sklearn.manifold import TSNE

In [25]:
df['Ticket Description'] = df.apply(lambda x: x['Ticket Description'].replace("{product_purchased}", x['Product Purchased']), axis=1)


In [28]:
df['Ticket Description'] = df['Ticket Description'].str.lower()

In [58]:
cleaned_df = df[['Ticket ID','Product Purchased','Ticket Type','Ticket Description']]

In [59]:
cleaned_df

Unnamed: 0,Ticket ID,Product Purchased,Ticket Type,Ticket Description
0,1,GoPro Hero,Technical issue,i'm having an issue with the gopro hero. pleas...
1,2,LG Smart TV,Technical issue,i'm having an issue with the lg smart tv. plea...
2,3,Dell XPS,Technical issue,i'm facing a problem with my dell xps. the del...
3,4,Microsoft Office,Billing inquiry,i'm having an issue with the microsoft office....
4,5,Autodesk AutoCAD,Billing inquiry,i'm having an issue with the autodesk autocad....
...,...,...,...,...
8464,8465,LG OLED,Product inquiry,my lg oled is making strange noises and not fu...
8465,8466,Bose SoundLink Speaker,Technical issue,i'm having an issue with the bose soundlink sp...
8466,8467,GoPro Action Camera,Technical issue,i'm having an issue with the gopro action came...
8467,8468,PlayStation,Product inquiry,i'm having an issue with the playstation. plea...


In [64]:
def process_ticket(ticket_text, min_length):
    
    # get common stop words that we'll remove during tokenization/text normalization
    stop_words = stopwords.words('english')

    #initialize lemmatizer
    wnl = WordNetLemmatizer()

    # helper function to change nltk's part of speech tagging to a wordnet format.
    def pos_tagger(nltk_tag):
        if nltk_tag.startswith('J'):
            return wordnet.ADJ
        elif nltk_tag.startswith('V'):
            return wordnet.VERB
        elif nltk_tag.startswith('N'):
            return wordnet.NOUN
        elif nltk_tag.startswith('R'):
            return wordnet.ADV
        else:         
            return None
   

    # lower case everything
    ticket_lower = ticket_text.lower()

    ticket_lower = re.sub(r"@[a-z0-9_]+|#[a-z0-9_]+|http\S+", "", ticket_lower).strip().replace("\r", "").replace("\n", "").replace("\t", "")

    # remove stop words and punctuations 
    
    extra_stop_words = ['issue','help','problem','review','name','seem','experience','please']
    
    ticket_norm = [x for x in word_tokenize(ticket_lower) if ((x.isalpha()) & (x not in stop_words)) ]
    ticket_norm = [x for x in word_tokenize(ticket_lower) if ((x.isalpha()) & (x not in extra_stop_words)) ]

    #  POS detection on the result will be important in telling Wordnet's lemmatizer how to lemmatize
    
    # creates list of tuples with tokens and POS tags in wordnet format
    wordnet_tagged = list(map(lambda x: (x[0], pos_tagger(x[1])), pos_tag(ticket_norm))) 

    # now we are going to have a cutoff here. any tokenized cocument with length < min length will be removed from corpus
    if len(wordnet_tagged) <= min_length:
        return ''
    else:
         # rejoins lemmatized sentence 
        ticket_norm = " ".join([wnl.lemmatize(x[0], x[1]) for x in wordnet_tagged if x[1] is not None])
        return ticket_norm

In [65]:
cleaned_df['Ticket Description'] = cleaned_df['Ticket Description'].apply(process_ticket, args = [10])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_df['Ticket Description'] = cleaned_df['Ticket Description'].apply(process_ticket, args = [10])


In [69]:
canon_df = cleaned_df[cleaned_df['Product Purchased']=='Canon EOS']

In [75]:
for type in canon_df['Ticket Type'].unique():
    print(type)

Refund request
Product inquiry
Technical issue
Billing inquiry
Cancellation request


In [76]:
for ticket_type in canon_df['Ticket Type'].unique():
    corpus = canon_df[canon_df['Ticket Type']==ticket_type]['Ticket Description']
    print(ticket_type)
    print('-------')
    vectorizer = TfidfVectorizer()
    X_train = vectorizer.fit_transform(corpus)
    X_train

    topic_model = NMF(n_components = 5)
    topic_model.fit(X_train)

    # to get H
    H = topic_model.transform(X_train) # transform document into topic vector representation

    # to get W 
    W = topic_model.components_ # word component weights for each topic

    for index,topic in enumerate(W):
        print(f'THE TOP 10 WORDS FOR TOPIC #{index}')
        print([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-25:]])
        print('\n')

Refund request
-------
THE TOP 10 WORDS FOR TOPIC #0
['screen', 'pop', 'message', 'mean', 'peculiar', 'up', 'resolve', 'notice', 'troubleshoot', 'assist', 'order', 'network', 'connect', 'use', 'eos', 'have', 'do', 'try', 'different', 'cable', 'peripheral', 'adapter', 'be', 'persists', 'canon']


THE TOP 10 WORDS FOR TOPIC #1
['box', 'page', 'need', 'tell', 'type', 'do', 'back', 'store', 'want', 'something', 'search', 'see', 'get', 'have', 'assist', 'eos', 'canon', 'step', 'option', 'find', 'perform', 'unable', 'action', 'guide', 'desire']


THE TOP 10 WORDS FOR TOPIC #2
['service', 'do', 'act', 'intermittent', 'unexpectedly', 'sometimes', 'be', 'eos', 'respond', 'canon', 'yesterday', 'now', 'turn', 'not', 'work', 'fine', 'face', 'already', 'multiple', 'contact', 'remain', 'unresolved', 'customer', 'support', 'time']


THE TOP 10 WORDS FOR TOPIC #3
['check', 'cost', 'crash', 'id', 'recent', 'account', 'occur', 'purchase', 'make', 'be', 'way', 'software', 'recover', 'start', 'have', 'los

In [94]:
# Check if 'Assigned Topic' and 'Topic Words' columns exist, if not, create them
if 'Assigned Topic' not in cleaned_df.columns:
    cleaned_df['Assigned Topic'] = None
if 'Topic Words' not in cleaned_df.columns:
    cleaned_df['Topic Words'] = None

# Initialize a dictionary to store top words for each topic of each product and ticket type
topic_words_dict = {}

# Iterate over each product
for product in cleaned_df['Product Purchased'].unique():
    # Filter for tickets related to the current product
    product_df = cleaned_df[cleaned_df['Product Purchased'] == product]
    
    for ticket_type in product_df['Ticket Type'].unique():
        # Selecting the subset of the DataFrame for the current ticket type
        subset_df = product_df[product_df['Ticket Type'] == ticket_type]
        corpus = subset_df['Ticket Description']
        ticket_ids = subset_df.index  # Using DataFrame index as a proxy for ticket ID if 'Ticket ID' column does not exist

        # Initialize TF-IDF Vectorizer and NMF Model
        vectorizer = TfidfVectorizer()
        X_train = vectorizer.fit_transform(corpus)
        topic_model = NMF(n_components=5, random_state=42)  # Added random_state for reproducibility
        topic_model.fit(X_train)

        # Transform document into topic vector representation
        H = topic_model.transform(X_train)  # Document-topic matrix
        W = topic_model.components_  # Topic-term matrix

        # For each topic, store the top words
        for topic_idx, topic in enumerate(W):
            top_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-20:]]  # Get top 10 words
            topic_key = f"{product}_{ticket_type}_Topic{topic_idx}"
            topic_words_dict[topic_key] = ', '.join(top_words)  # Store as a comma-separated string

        # Assign tickets to topics based on the highest topic weight
        for ticket_index, topic_vector in zip(ticket_ids, H):
            # Find the topic with the highest weight for this ticket
            assigned_topic = topic_vector.argmax()
            topic_key = f"{product}_{ticket_type}_Topic{assigned_topic}"
            # Update the 'Assigned Topic' in cleaned_df directly
            cleaned_df.at[ticket_index, 'Assigned Topic'] = topic_key

# Map the 'Assigned Topic' to its corresponding top words
cleaned_df['Topic Words'] = cleaned_df['Assigned Topic'].map(topic_words_dict)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_df['Topic Words'] = cleaned_df['Assigned Topic'].map(topic_words_dict)


In [95]:
cleaned_df

Unnamed: 0,Ticket ID,Product Purchased,Ticket Type,Ticket Description,Assigned Topic,Topic Words
0,1,GoPro Hero,Technical issue,have gopro hero bill zip code appreciate have request website double check email address try troubleshoot step mention user manual persists,GoPro Hero_Technical issue_Topic0,"start, relate, recently, afterward, use, late, version, run, different, firmware, happen, check, persists, already, assist, be, have, gopro, hero, update"
1,2,LG Smart TV,Technical issue,have lg smart tv need change exist have lg smart tv face intermittent sometimes work fine other time act up unexpectedly,LG Smart TV_Technical issue_Topic1,"lg, unable, use, keep, even, display, credential, correct, regain, login, information, work, need, productivity, affect, soon, assistance, possible, access, account"
2,3,Dell XPS,Technical issue,face dell dell xps not turn be work fine yesterday now do really use original charger come dell not charge properly,Dell XPS_Technical issue_Topic1,"website, official, support, up, be, other, time, dell, sometimes, unexpectedly, intermittent, act, do, now, turn, yesterday, not, work, fine, face"
3,4,Microsoft Office,Billing inquiry,have microsoft office have interest see happen check feedback already contact customer support multiple time remain unresolved,Microsoft Office_Billing inquiry_Topic2,"recently, relate, available, not, happen, other, occur, assist, recent, change, device, software, office, microsoft, make, start, be, have, product, update"
4,5,Autodesk AutoCAD,Billing inquiry,have autodesk autocad seller not responsible damage arise delivery battleground game have game good condition ship notice sudden decrease battery life autodesk use last much long,Autodesk AutoCAD_Billing inquiry_Topic1,"be, not, add, time, fine, now, face, autodesk, product, message, pop, screen, mean, peculiar, do, notice, account, error, say, up"
...,...,...,...,...,...,...
8464,8465,LG OLED,Product inquiry,lg oled make strange noise not function properly suspect be hardware always email support need assistance soon possible affect work productivity,LG OLED_Product inquiry_Topic1,"lg, possible, properly, time, be, function, hardware, suspect, strange, make, sometimes, face, other, fine, intermittent, unexpectedly, act, up, noise, work"
8465,8466,Bose SoundLink Speaker,Technical issue,have bose soundlink speaker also buy get stick face intermittent sometimes work fine other time act up unexpectedly,Bose SoundLink Speaker_Technical issue_Topic1,"stick, note, price, list, product, have, assist, bose, speaker, soundlink, be, use, time, already, customer, multiple, support, remain, unresolved, contact"
8466,8467,GoPro Action Camera,Technical issue,have gopro action camera assist be use different browser use perform factory reset gopro action camera hop resolve do,GoPro Action Camera_Technical issue_Topic0,"option, unable, same, find, product, browser, be, different, have, use, assist, do, camera, gopro, reset, action, resolve, perform, hop, factory"
8467,8468,PlayStation,Product inquiry,have playstation assist do think product same category playstation have be case noticed occurs consistently use specific feature application playstation,PlayStation_Product inquiry_Topic0,"clear, cache, network, connect, use, software, make, change, consistently, application, feature, do, same, persists, try, device, be, assist, playstation, have"


In [96]:
cleaned_df[cleaned_df['Product Purchased']=='Canon EOS']['Topic Words']

19             account, occur, purchase, make, be, way, software, recover, start, have, lose, concern, ensure, security, safe, eos, update, canon, product, data
36       up, resolve, notice, troubleshoot, assist, order, network, connect, use, eos, have, do, try, different, cable, peripheral, adapter, be, persists, canon
85       not, perform, use, troubleshoot, eos, password, website, support, official, say, error, factory, hop, login, reset, canon, access, resolve, do, account
144                     do, back, store, want, something, search, see, get, have, assist, eos, canon, step, option, find, perform, unable, action, guide, desire
199            send, item, now, not, current, product, try, payment, uipybar, have, start, eos, canon, recently, relate, afterward, firmware, happen, update, be
                                                                                  ...                                                                           
8222       data, support, step, eo

In [98]:
cleaned_df

Unnamed: 0,Ticket ID,Product Purchased,Ticket Type,Ticket Description,Assigned Topic,Topic Words
0,1,GoPro Hero,Technical issue,have gopro hero bill zip code appreciate have request website double check email address try troubleshoot step mention user manual persists,GoPro Hero_Technical issue_Topic0,"start, relate, recently, afterward, use, late, version, run, different, firmware, happen, check, persists, already, assist, be, have, gopro, hero, update"
1,2,LG Smart TV,Technical issue,have lg smart tv need change exist have lg smart tv face intermittent sometimes work fine other time act up unexpectedly,LG Smart TV_Technical issue_Topic1,"lg, unable, use, keep, even, display, credential, correct, regain, login, information, work, need, productivity, affect, soon, assistance, possible, access, account"
2,3,Dell XPS,Technical issue,face dell dell xps not turn be work fine yesterday now do really use original charger come dell not charge properly,Dell XPS_Technical issue_Topic1,"website, official, support, up, be, other, time, dell, sometimes, unexpectedly, intermittent, act, do, now, turn, yesterday, not, work, fine, face"
3,4,Microsoft Office,Billing inquiry,have microsoft office have interest see happen check feedback already contact customer support multiple time remain unresolved,Microsoft Office_Billing inquiry_Topic2,"recently, relate, available, not, happen, other, occur, assist, recent, change, device, software, office, microsoft, make, start, be, have, product, update"
4,5,Autodesk AutoCAD,Billing inquiry,have autodesk autocad seller not responsible damage arise delivery battleground game have game good condition ship notice sudden decrease battery life autodesk use last much long,Autodesk AutoCAD_Billing inquiry_Topic1,"be, not, add, time, fine, now, face, autodesk, product, message, pop, screen, mean, peculiar, do, notice, account, error, say, up"
...,...,...,...,...,...,...
8464,8465,LG OLED,Product inquiry,lg oled make strange noise not function properly suspect be hardware always email support need assistance soon possible affect work productivity,LG OLED_Product inquiry_Topic1,"lg, possible, properly, time, be, function, hardware, suspect, strange, make, sometimes, face, other, fine, intermittent, unexpectedly, act, up, noise, work"
8465,8466,Bose SoundLink Speaker,Technical issue,have bose soundlink speaker also buy get stick face intermittent sometimes work fine other time act up unexpectedly,Bose SoundLink Speaker_Technical issue_Topic1,"stick, note, price, list, product, have, assist, bose, speaker, soundlink, be, use, time, already, customer, multiple, support, remain, unresolved, contact"
8466,8467,GoPro Action Camera,Technical issue,have gopro action camera assist be use different browser use perform factory reset gopro action camera hop resolve do,GoPro Action Camera_Technical issue_Topic0,"option, unable, same, find, product, browser, be, different, have, use, assist, do, camera, gopro, reset, action, resolve, perform, hop, factory"
8467,8468,PlayStation,Product inquiry,have playstation assist do think product same category playstation have be case noticed occurs consistently use specific feature application playstation,PlayStation_Product inquiry_Topic0,"clear, cache, network, connect, use, software, make, change, consistently, application, feature, do, same, persists, try, device, be, assist, playstation, have"


In [181]:
import random
index = random.randint(0, cleaned_df.shape[0])

# cleaned_df.iloc[[index]]

product = cleaned_df.iloc[index]['Product Purchased']
issue_type = cleaned_df.iloc[index]['Ticket Type']
topic_words = cleaned_df.iloc[index]['Topic Words']
description = cleaned_df.iloc[index]['Ticket Description']

print(product)
print(issue_type)
print(topic_words)
print()
print(description)


GoPro Action Camera
Technical issue
affect, possible, soon, assistance, other, have, gopro, up, camera, product, action, not, unexpectedly, sometimes, act, intermittent, time, fine, face, work

have gopro action camera message product not available have not be mark try have purchase incorrect product face intermittent sometimes work fine other time act up unexpectedly


In [186]:
load_dotenv()

api_key = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key = api_key)

response = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "You are a customer support bot. Your job is to give helpful advice when a customer writes in with a given issue. We have used a machine learning model to assign a topic to their specific request based on the top 20 words associated with that topic. The response should always contain a resolution, there will be no opportunity for followup from the user."},
    {"role": "user", "content": f"Product is {product}, Issue type is {issue_type}, Topic words are {topic_words}"}
  ]
)

print(response.choices[0].message.content)

If your GoPro action camera is not working fine and is acting up intermittently, there might be a technical issue affecting its performance. To troubleshoot this problem, you can try the following steps:
1. Make sure the camera is fully charged.
2. Check for any software updates for your GoPro product.
3. Reset the camera to its factory settings.
4. Test the camera in different shooting scenarios to see if the issue persists.
If the problem continues, reach out to GoPro customer assistance for further help and possible solutions.
