In [6]:
import pandas as pd
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

In [8]:
import nltk

In [9]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Bluechip\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [19]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Bluechip\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [25]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Bluechip\AppData\Roaming\nltk_data...


True

In [26]:
# Load the CSV dataset
data = pd.read_csv('customer_service.csv')


In [27]:
# Preprocess the data by cleaning and tokenizing the text
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [28]:
def preprocess(text):
    # convert to lowercase
    text = text.lower()
    # remove non-alphanumeric characters
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    # tokenize text
    tokens = word_tokenize(text)
    # remove stop words
    tokens = [word for word in tokens if word not in stop_words]
    # lemmatize tokens
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    # join tokens back into text
    text = ' '.join(tokens)
    return text
data['text'] = data['text'].apply(preprocess)

In [29]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['intent'], test_size=0.2)


In [30]:
# Convert the text data into vectors using TF-IDF vectorizer
vectorizer = TfidfVectorizer()
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)

In [31]:
# Train a machine learning model on the preprocessed data to classify the tweets into different intents
clf = MultinomialNB()
clf.fit(X_train_vectors, y_train)

In [32]:
# Build a chatbot using the trained model to generate responses based on the user's input
def classify_intent(text):
    text = preprocess(text)
    vector = vectorizer.transform([text])
    intent = clf.predict(vector)[0]
    return intent

def get_response(intent):
    if intent == 'internet':
        response = 'Please contact our internet service provider at 1-800-XXX-XXXX'
    elif intent == 'account':
        response = 'Please provide us with your account details so that we can assist you better.'
    elif intent == 'order':
        response = 'We apologize for the delay in your order. Can you please provide your order number?'
    # more if-else statements for other intents
    else:
        response = 'Sorry, I did not understand your request. Can you please try again?'
    return response

In [33]:
# Test the chatbot by inputting a sample text
text = 'My order is delayed'
intent = classify_intent(text)
response = get_response(intent)
print(response)

We apologize for the delay in your order. Can you please provide your order number?
