# **Automated Query Response System**
This project is a **simplified** version of my Summer Internship experience. The data I will be using is very limited and is randomly generated. The classes might be imbalanced and the overall quality might not be the best. It is created to help me reflect on the experience and solidify the key concepts used.

By Zichen Liu

## Step 1: Create Dataset
Creating my own pre-train dataset of common queries, defining their intent and entity.

In [None]:
data = {
    "queries": [
        "How do I reset my password?",
        "Where can I download the latest firmware?",
        "My software is crashing when I try to open it.",
        "Can I upgrade my subscription plan?",
        "I am getting an error code 404.",
        "How can I change my account email?",
        "What are the specifications of the latest model?",
        "The app keeps freezing. What should I do?",
        "How do I connect my device to Wi-Fi?",
        "How do I enable two-factor authentication?",
        "My battery is draining too quickly.",
        "Is there a way to recover deleted files?",
        "How do I check my warranty status?",
        "Can I schedule a service appointment online?",
        "I want to cancel my order.",
        "What payment methods do you accept?",
        "How can I return a product?",
        "Where is the nearest service center?",
        "My order is delayed, what should I do?",
        "How do I activate my product license?",
        "Can I transfer my data to a new device?",
        "The touchscreen is unresponsive.",
        "How do I update my profile information?",
        "Can I get a refund for my subscription?",
        "How do I track my order?",
        "What is the warranty on this product?",
        "The screen is flickering. What should I do?",
        "Can I download previous firmware versions?",
        "How do I set up parental controls?",
        "My internet connection is slow.",
        "Can I upgrade my hardware?",
        "How do I change my billing information?",
        "I received a damaged product, what should I do?",
        "How do I clear the app cache?",
        "Is there an extended warranty available?",
        "How do I disable two-factor authentication?",
        "How can I delete my account?",
        "My device is overheating. Help!",
        "Where can I find the user manual?",
        "How do I reinstall the operating system?",
        "Can I exchange my product for a different model?",
        "How do I recover my username?",
        "How can I reset my security questions?",
        "Can I get technical support over the phone?",
        "How do I disable location services?",
        "The app is not updating automatically.",
        "Can I postpone my subscription renewal?",
        "How do I check for software updates?",
        "What is the return policy on accessories?"
    ],

    "intent": [
        "Account_Update",
        "Firmware_Download",
        "Technical_Issue",
        "Subscription_Update",
        "Error_Code_Explanation",
        "Account_Update",
        "Product_Inquiry",
        "Technical_Issue",
        "Technical_Support",
        "Account_Security",
        "Technical_Issue",
        "Data_Recovery",
        "Warranty_Inquiry",
        "Service_Scheduling",
        "Order_Management",
        "Payment_Inquiry",
        "Return_Process",
        "Location_Inquiry",
        "Order_Management",
        "Product_Activation",
        "Data_Transfer",
        "Technical_Issue",
        "Account_Update",
        "Refund_Request",
        "Order_Tracking",
        "Warranty_Inquiry",
        "Technical_Issue",
        "Firmware_Download",
        "Parental_Control",
        "Technical_Issue",
        "Hardware_Upgrade",
        "Billing_Inquiry",
        "Return_Process",
        "Technical_Support",
        "Warranty_Inquiry",
        "Account_Security",
        "Account_Deletion",
        "Technical_Issue",
        "Product_Inquiry",
        "Operating_System",
        "Product_Exchange",
        "Account_Update",
        "Account_Security",
        "Technical_Support",
        "Account_Security",
        "Technical_Issue",
        "Subscription_Update",
        "Software_Update",
        "Return_Process"
    ],

    "entities": [
        {"Action": "reset", "Component": "password"},
        {"Action": "download", "Component": "firmware"},
        {"Component": "software", "Issue": "crashing"},
        {"Action": "upgrade", "Component": "subscription plan"},
        {"Issue": "error code 404"},
        {"Action": "change", "Component": "account email"},
        {"Action": "specifications", "Component": "latest model"},
        {"Component": "app", "Issue": "freezing"},
        {"Action": "connect", "Component": "Wi-Fi"},
        {"Action": "enable", "Component": "two-factor authentication"},
        {"Component": "battery", "Issue": "draining too quickly"},
        {"Action": "recover", "Component": "deleted files"},
        {"Action": "check", "Component": "warranty status"},
        {"Action": "schedule", "Component": "service appointment"},
        {"Action": "cancel", "Component": "order"},
        {"Action": "accept", "Component": "payment methods"},
        {"Action": "return", "Component": "product"},
        {"Action": "find", "Component": "nearest service center"},
        {"Action": "delayed", "Component": "order"},
        {"Action": "activate", "Component": "product license"},
        {"Action": "transfer", "Component": "data to a new device"},
        {"Component": "touchscreen", "Issue": "unresponsive"},
        {"Action": "update", "Component": "profile information"},
        {"Action": "get", "Component": "refund for subscription"},
        {"Action": "track", "Component": "order"},
        {"Component": "product", "Issue": "warranty"},
        {"Component": "screen", "Issue": "flickering"},
        {"Action": "download", "Component": "previous firmware versions"},
        {"Action": "set up", "Component": "parental controls"},
        {"Component": "internet", "Issue": "connection slow"},
        {"Action": "upgrade", "Component": "hardware"},
        {"Action": "change", "Component": "billing information"},
        {"Action": "received", "Component": "damaged product"},
        {"Action": "clear", "Component": "app cache"},
        {"Component": "warranty", "Issue": "extended"},
        {"Action": "disable", "Component": "two-factor authentication"},
        {"Action": "delete", "Component": "account"},
        {"Component": "device", "Issue": "overheating"},
        {"Action": "find", "Component": "user manual"},
        {"Action": "reinstall", "Component": "operating system"},
        {"Action": "exchange", "Component": "product for different model"},
        {"Action": "recover", "Component": "username"},
        {"Action": "reset", "Component": "security questions"},
        {"Action": "get", "Component": "technical support over phone"},
        {"Action": "disable", "Component": "location services"},
        {"Component": "app", "Issue": "not updating automatically"},
        {"Action": "postpone", "Component": "subscription renewal"},
        {"Action": "check", "Component": "software updates"},
        {"Component": "return policy", "Issue": "accessories"}
    ]
}

# Original intents
original_intents = data['intent']

# Group the intents into broader categories
grouped_intents = [
    "Account_Management" if intent in ["Account_Update", "Account_Security", "Billing_Inquiry", "Account_Deletion"] else
    "Technical_Support" if intent in ["Technical_Issue", "Firmware_Download", "Operating_System", "Hardware_Upgrade", "Technical_Support", "Software_Update"] else
    "Customer_Service" if intent in ["Order_Management", "Return_Process", "Refund_Request", "Order_Tracking", "Service_Scheduling", "Payment_Inquiry", "Location_Inquiry"] else
    "Product_Inquiry" if intent in ["Product_Activation", "Product_Exchange", "Warranty_Inquiry", "Product_Inquiry", "Parental_Control"] else
    intent
    for intent in original_intents
]

# Update the data with the new grouped intents
data['intent'] = grouped_intents

# Check the new distribution of intents
from collections import Counter
print("New Intent Distribution:", Counter(data['intent']))

New Intent Distribution: Counter({'Technical_Support': 16, 'Account_Management': 10, 'Customer_Service': 10, 'Product_Inquiry': 8, 'Subscription_Update': 2, 'Error_Code_Explanation': 1, 'Data_Recovery': 1, 'Data_Transfer': 1})


## Step 2: Preprocessing
Preprocessing the queries using NLTK by **lemmatization** and getting rid of irrelvant information


In [None]:
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Stopwords do not need to be removed here because later on Vectorizer will take care of it

# Lowercasing the queries
queries = [query.lower() for query in data['queries']]

# Removing punctuation and special characters
queries = [re.sub(r'[^\w\s]', '', query) for query in queries]

# Tokenization and Lemmatization
processed_queries = []
for query in queries:
  tokens = nltk.word_tokenize(query)  # Tokenize the query
  lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]  # Lemmatize each token
  processed_query = ' '.join(lemmatized_tokens)  # Rejoin tokens into a single string
  processed_queries.append(processed_query)

# Output the processed queries (for verification)
print("Processed Queries:")
for q in processed_queries:
    print(q)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Processed Queries:
how do i reset my password
where can i download the latest firmware
my software is crashing when i try to open it
can i upgrade my subscription plan
i am getting an error code 404
how can i change my account email
what are the specification of the latest model
the app keep freezing what should i do
how do i connect my device to wifi
how do i enable twofactor authentication
my battery is draining too quickly
is there a way to recover deleted file
how do i check my warranty status
can i schedule a service appointment online
i want to cancel my order
what payment method do you accept
how can i return a product
where is the nearest service center
my order is delayed what should i do
how do i activate my product license
can i transfer my data to a new device
the touchscreen is unresponsive
how do i update my profile information
can i get a refund for my subscription
how do i track my order
what is the warranty on this product
the screen is flickering what should i do
can 

## Step 3: Vectorization
Using **TF-IDF Vectorizer** to convert each word to vectors in order to be processed and weighed by the **Support Vector Machine (SVM)**.


In [None]:
# Import the TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the processed queries
X = vectorizer.fit_transform(processed_queries)

# Prepare the intent labels (y) from the dataset
y = data['intent']

# Output the shape of X and y
print("Shape of X:", X.shape)
print("Shape of y:", len(y))

Shape of X: (49, 107)
Shape of y: 49


## Step 4: Splitting Data into Training and Testing
Using **train_test_split** to split data

In [None]:
# import sklearn
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

## Step 5: Training the Classifier
Training SVM classifier on the training set

In [None]:
# Import SVM
from sklearn.svm import SVC

# Initialize the SVM classifier
classifier = SVC(kernel='linear')

# Train the classifier
classifier.fit(X_train, y_train)

## Step 6: Running the Model

In [None]:
# Import necessary library for evaluating the model
from sklearn.metrics import classification_report

# Predict the intent on the test set
y_pred = classifier.predict(X_test)

# Print the classification report to evaluate model performance
print("Classification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
                     precision    recall  f1-score   support

 Account_Management       1.00      1.00      1.00         1
   Customer_Service       0.50      1.00      0.67         1
    Product_Inquiry       1.00      1.00      1.00         1
Subscription_Update       0.00      0.00      0.00         1
  Technical_Support       1.00      1.00      1.00         6

           accuracy                           0.90        10
          macro avg       0.70      0.80      0.73        10
       weighted avg       0.85      0.90      0.87        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Step 7: Evaluate the Parameters
The model is not flushed out, it was largely because of the size and quality of the training dataset. However, let's see if we can get better results by "fine-tuning parameters" of the model with cross-validation

In [None]:
import numpy as np
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 0.5, 1, 10, 100],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 1, 0.1, 0.01, 0.001, 0.0001]
}

optimal_params = GridSearchCV(classifier, param_grid, cv=5, scoring = 'accuracy')
optimal_params.fit(X_train, y_train)

print(optimal_params.best_params_)



{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [None]:
# Initialize the SVM classifier
classifier = SVC(kernel='rbf',  C = 10, random_state= 1)

# Train the classifier
classifier.fit(X_train, y_train)

In [None]:
# Predict the intent on the test set
y_pred = classifier.predict(X_test)

# Print the classification report to evaluate model performance
print("Classification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
                     precision    recall  f1-score   support

 Account_Management       1.00      1.00      1.00         1
   Customer_Service       0.50      1.00      0.67         1
    Product_Inquiry       1.00      1.00      1.00         1
Subscription_Update       0.00      0.00      0.00         1
  Technical_Support       1.00      1.00      1.00         6

           accuracy                           0.90        10
          macro avg       0.70      0.80      0.73        10
       weighted avg       0.85      0.90      0.87        10



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Step 8: Integration of Named Entity Recognition (NER)


In [None]:
import spacy
from spacy.training import Example
from spacy.util import minibatch, compounding
from spacy.tokens import DocBin
import pandas as pd

# Load the small English model from spaCy
nlp = spacy.load("en_core_web_sm")

# Create a DataFrame from the dataset
df = pd.DataFrame(data)

# Convert the dataset to spaCy's training format
db = DocBin()
for _, row in df.iterrows():
    doc = nlp.make_doc(row["queries"])
    ents = []
    for label, value in row["entities"].items():
        start = row["queries"].find(value)
        end = start + len(value)
        span = doc.char_span(start, end, label=label)
        if span:
            ents.append(span)
    doc.ents = ents
    db.add(doc)

# Save the training data
db.to_disk("ner_training_data.spacy")

# Load the training data
db = DocBin().from_disk("ner_training_data.spacy")
docs = list(db.get_docs(nlp.vocab))

# Prepare training examples
train_examples = [Example.from_dict(doc, {"entities": [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]}) for doc in docs]

# Train the NER model
ner = nlp.get_pipe("ner")
optimizer = nlp.resume_training()

for iteration in range(20):
    losses = {}
    batches = minibatch(train_examples, size=compounding(4.0, 32.0, 1.001))
    for batch in batches:
        nlp.update(batch, sgd=optimizer, drop=0.5, losses=losses)
    print(f"Iteration {iteration + 1}, Losses: {losses}")

# Save the trained NER model
nlp.to_disk("trained_ner_model")


Iteration 1, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 240.68316583269794}
Iteration 2, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 217.66729600650433}
Iteration 3, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 173.60777008064906}
Iteration 4, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 156.25004524504766}
Iteration 5, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 134.31182173534762}
Iteration 6, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 122.42891162754859}
Iteration 7, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 104.89696787701044}
Iteration 8, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 100.64956386338163}
Iteration 9, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 89.29157836020488}
Iteration 10, Losses: {'tok2vec': 0.0, 'tagger': 0.0, 'parser': 0.0, 'ner': 81.55381477876898}
Iteration 11, Losses: {'tok2vec': 0.0, 'tagger': 0.

In [None]:
text = "How do I initiate a rebooting service from the local computer"
doc = nlp(text)
print("Entities:", [(ent.text, ent.label_) for ent in doc.ents])

Entities: [('initiate', 'Action'), ('rebooting service', 'Component'), ('local computer', 'Component')]


## Mapping the responses

In [None]:
# Define response templates based on intent
response_templates = {
    "Account_Management": {
        ("reset", "password"): "To reset your password, please visit the password management section of your account.",
        ("update", "email"): "To update your email address, go to your account settings under the contact information section.",
        ("delete", "account"): "If you want to delete your account, please follow the steps in the account deletion guide.",
        "default": "To manage your account, visit the account settings section."
    },
    "Technical_Support": {
        ("software", "crashing"): "If your software is crashing, try reinstalling it from the official website or contact support.",
        ("internet connection", "slow"): "For slow internet connection issues, please restart your router or check our connectivity guide.",
        ("update", "firmware"): "To update your firmware, download the latest version from our website and follow the installation instructions.",
        "default": "For technical support, visit our support page or contact us."
    },
    "Subscription_Update": {
        ("upgrade", "subscription plan"): "To upgrade your subscription plan, visit the subscription section in your account and select the new plan.",
        ("cancel", "subscription"): "To cancel your subscription, please go to the subscription management section and follow the cancellation process.",
        "default": "For subscription updates, visit the subscription management section in your account."
    },
    "Customer_Service": {
        "default": "Our customer service team can assist you. Please visit the customer service section or contact us directly."
    },
    "Product_Inquiry": {
        "default": "For product inquiries, visit the product details section on our website."
    }
}


# Function to generate the response based on intent and entities
def generate_response(intent, entities):
    entity_tuple = (entities.get("Action"), entities.get("Component"))

    # Get the specific response template if it exists, otherwise use default
    response_template = response_templates.get(intent, {}).get(entity_tuple, response_templates.get(intent, {}).get("default", "Sorry, I don't understand your request."))

    response = response_template.format(
        action=entities.get("Action", "perform this action"),
        component=entities.get("Component", "this component"),
        issue=entities.get("Issue", "this issue")
    )

    return response

# Example function to process a new query
def process_query(query):
    # Preprocess the new query
    query_vectorized = vectorizer.transform([query])

    # Predict the intent using the SVM model
    predicted_intent = classifier.predict(query_vectorized)[0]

    # Extract entities using the NER model
    doc = nlp(query)
    entities = {ent.label_: ent.text for ent in doc.ents}

    # Generate the response
    response = generate_response(predicted_intent, entities)

    return predicted_intent, entities, response

# Example usage
new_query = "What is the return policy on accessories?"
intent, entities, response = process_query(new_query)

print("Predicted Intent:", intent)
print("Extracted Entities:", entities)
print("Generated Response:", response)

Predicted Intent: Customer_Service
Extracted Entities: {'Component': 'return policy', 'Issue': 'accessories'}
Generated Response: Our customer service team can assist you. Please visit the customer service section or contact us directly.
