In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import json

In [2]:
try:
    with open('sample_conversations.json', 'r', encoding='utf-8') as file:
        conversations = json.load(file)
except UnicodeDecodeError:
    with open('sample_conversations.json', 'r', encoding='latin-1') as file:
        conversations = json.load(file)

In [5]:
# Clean and preprocess the data
def clean_text(text):
    # Remove punctuation except question marks
    text = re.sub(r'[^\w\s?]', '', text)
    # Convert to lowercase, but keep the first letter capitalized 
    text = text.lower().capitalize()
    return text

In [4]:
# Extract all messages
all_messages = []
for issue in conversations['Issues']:
    for message in issue['Messages']:
        if not message['IsFromCustomer']:  # Only consider representative messages
            all_messages.append(message['Text'])

In [6]:
cleaned_conversations = [clean_text(msg) for msg in all_messages]

In [7]:
# Create a DataFrame
df = pd.DataFrame({'conversation': cleaned_conversations})

In [8]:
# Count the frequency of each sentence
sentence_counts = df['conversation'].value_counts()

In [9]:
# Create TF-IDF vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['conversation'])

In [10]:
def get_autocomplete(input_text, top_n=3):
    input_text = clean_text(input_text)
    input_vector = vectorizer.transform([input_text])
    
    # Calculate cosine similarity
    similarities = cosine_similarity(input_vector, tfidf_matrix).flatten()
    
    # Get sentences that start with the input text
    starts_with = df['conversation'][df['conversation'].str.startswith(input_text)]
    
    # If we have sentences that start with the input, prioritize them
    if not starts_with.empty:
        suggestions = starts_with.tolist()[:top_n]
        remaining = top_n - len(suggestions)
        if remaining > 0:
            # Add other similar sentences if needed
            other_suggestions = df['conversation'][~df['conversation'].str.startswith(input_text)].iloc[similarities.argsort()[-remaining:][::-1]].tolist()
            suggestions.extend(other_suggestions)
    else:
        # If no sentences start with the input, use the most similar ones
        suggestions = df['conversation'].iloc[similarities.argsort()[-top_n:][::-1]].tolist()
    
    # Remove duplicates and capitalize first letter
    suggestions = list(dict.fromkeys(suggestions))
    suggestions = [s.capitalize() for s in suggestions]
    
    return suggestions[:top_n]


In [None]:
print("Welcome to the Smart Autocomplete-NLP for Customer Service Interactions!")
print("Type the beginning of a sentence, and the system will suggest completions.")
print("Type 'exit' to quit the program.")

while True:
    user_input = input("\nRepresentative's input: ")
    
    if user_input.lower() == 'exit':
        print("Thank you for using the Smart Autocomplete-NLP system. Goodbye!")
        break
    
    suggestions = get_autocomplete(user_input)
    
    print(f"\nFor the input \"{user_input}\":")
    print("Autocomplete suggestions:")
    for i, suggestion in enumerate(suggestions, 1):
        print(f"▪ \"{suggestion}\"")

Welcome to the Smart Autocomplete-NLP for Customer Service Interactions!
Type the beginning of a sentence, and the system will suggest completions.
Type 'exit' to quit the program.



Representative's input:  what is



For the input "what is":
Autocomplete suggestions:
▪ "What is your apartment number?"
▪ "What is the cell phone number on the contract?"
▪ "What is the cell number?"
