In [2]:
!pip install pandas scikit-learn nltk



In [3]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shrey\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import download

# Download necessary NLTK resources
download('punkt')
download('stopwords')
download('wordnet')

# Preprocessing Function
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    
    # Tokenize text
    tokens = word_tokenize(text)
    
    # Remove stopwords and short tokens
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words and len(token) > 1]
    
    # Lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    return ' '.join(tokens)

# Parse Conversation Files Function
def parse_conversation(content):
    # Extract metadata using regex
    conv_id_match = re.search(r'Conversation ID: (.+)', content)
    category_match = re.search(r'Category: (.+)', content)
    sentiment_match = re.search(r'Sentiment: ([^|]+)', content)
    priority_match = re.search(r'Priority: (.+)', content)
    
    # Extract customer and agent messages
    customer_messages = re.findall(r'Customer: "(.+?)"', content)
    agent_messages = re.findall(r'Agent: "(.+?)"', content)
    
    conversation_text = ' '.join(customer_messages + agent_messages)
    
    return {
        'Conversation ID': conv_id_match.group(1).strip() if conv_id_match else None,
        'Category': category_match.group(1).strip() if category_match else None,
        'Sentiment': sentiment_match.group(1).strip() if sentiment_match else None,
        'Priority': priority_match.group(1).strip() if priority_match else None,
        'Customer Text': ' '.join(customer_messages),
        'Agent Text': ' '.join(agent_messages),
        'Full Text': conversation_text
    }

# Load Conversation Files Content
conversation_files_content = [
    """Conversation ID: TECH_003
Category: Technical Support
Sentiment: Annoyed | Priority: Critical
Customer: "Hi, I’m really frustrated. Your smart home app crashes every time I try to connect my older thermostat model. It worked fine on my old phone!"
Agent: "Hello! I apologize for the trouble. Could you confirm your phone model and app version?"
Customer: "iPhone 15, app version 5.2.1. The thermostat is Model HT-2019."
Agent: "Thank you for clarifying. Unfortunately, HT-2019 isn’t supported in versions after 5.0. We can roll back your app to 4.9 or offer a discount on a compatible thermostat. What works best for you?"
Customer: "I’ll take the discount, but this is disappointing."
Agent: "Understood. I’ll email the discount code shortly. We appreciate your patience!"
""",
    """Conversation ID: TECH_002
Category: Technical Support
Sentiment: Confused | Priority: Medium
Customer: "Good morning! I’m having an issue where my app keeps saying ‘no internet connection,’ but my Wi-Fi is working fine. Other apps load normally."
Agent: "Hi! Thanks for contacting us. Let’s check the app’s network permissions. Go to Settings > Apps > [App Name] > Permissions. Is ‘Local Network’ enabled?"
Customer: "Hmm, it was off! I just turned it on, but still no luck."
Agent: "No worries! Please clear the app cache: Settings > Storage > Clear Cache. Then log in again. Does that help?"
Customer: "That fixed it! Thank you! Any idea why this happened suddenly?"
Agent: "Glad to hear it! A recent update may have reset permissions. We’ll flag this to our dev team. Cheers!"
""",
    """Conversation ID: TECH_001
Category: Technical Support
Sentiment: Frustrated | Priority: High
Customer: "Hi there! I’ve been trying to install the latest update for your design software for hours. It keeps failing at 75% with an ‘unknown error.’ What’s wrong?"
Agent: "Hello! Thank you for reaching out. Let me help troubleshoot. Could you share a screenshot of the error message and confirm your operating system version?"
Customer: "Sure, it’s Windows 11. Here’s the screenshot: [image link]. I’ve restarted twice, same issue."
Agent: "Thank you for the details. This is a known conflict with third-party antivirus tools. Could you temporarily disable your antivirus and retry? I’ll also send a direct download link as a workaround."
Customer: "Oh, disabling the antivirus worked! Installation completed. Thanks for your help!"
Agent: "You’re welcome! Let us know if you need further assistance. Have a great day!"
""",
    """Conversation ID: TECH_005
Category: Technical Support
Sentiment: Urgent | Priority: Critical
Customer: "Hi, this is urgent! Your API is rejecting our payment gateway integration. Error: ‘Invalid SSL certificate.’ Our cert is valid and up-to-date!"
Agent: "Hello! Let’s investigate immediately. Could you share the output from openssl s_client -connect yourgateway.com:443?"
Customer: "Here’s the terminal output: [text]. See? No errors here."
Agent: "Thank you! Our system requires TLS 1.3, but your server supports only up to TLS 1.2. Upgrading the protocol will resolve the authentication error."
Customer: "Upgrading worked! Thanks for the quick fix!"
Agent: "Happy to help! Don’t hesitate to reach out for future issues. Goodbye!"
""",
    """Conversation ID: TECH_004
Category: Technical Support
Sentiment: Anxious | Priority: High
Customer: "Hello! My project data isn’t syncing between my laptop and tablet. Changes on one device don’t show up on the other. Can you help?"
Agent: "Hi there! Let’s resolve this together. Are both devices logged into the same account? Could you share the sync logs?"
Customer: "Yes, same account. Here’s a log from my tablet: [file attached]."
Agent: "Thanks for sharing! The log shows a corrupted sync token. I’ll reset it manually. Go to Settings > Sync > Force Full Sync and wait 10 minutes. Let me know if it works!"
Customer: "It’s syncing now! Will this happen again?"
Agent: "Great news! A patch releasing next week will prevent this issue. Thanks for your patience!"
"""
]

# Parse Conversations into a DataFrame
conversations_data = []
for content in conversation_files_content:
    parsed_data = parse_conversation(content)
    conversations_data.append(parsed_data)

conversations_df = pd.DataFrame(conversations_data)

# Load Historical Ticket Data from CSV
historical_data = pd.read_csv('Historical_ticket_data.csv')

# Clean column names in historical data
historical_data.columns = historical_data.columns.str.strip()

# Preprocess Text Data
conversations_df['Preprocessed_Customer_Text'] = conversations_df['Customer Text'].apply(preprocess_text)

# Extract Features Using TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=100)
tfidf_features = tfidf_vectorizer.fit_transform(conversations_df['Preprocessed_Customer_Text'])

# Train Classifiers for Issue Type, Sentiment, and Priority Prediction
X = tfidf_features
y_issue_type = conversations_df['Category']
y_sentiment = conversations_df['Sentiment']
y_priority = conversations_df['Priority']

issue_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
issue_classifier.fit(X, y_issue_type)

sentiment_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
sentiment_classifier.fit(X, y_sentiment)

priority_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
priority_classifier.fit(X, y_priority)

# Create Solution Mapping from Historical Data and Conversations Data
issue_solution_mapping = {}

# Populate solution mapping from historical data
for _, row in historical_data.iterrows():
    key = (row['Issue Category'], row['Sentiment'], row['Priority'])
    solution_texts = row['Solution']
    
    if key in issue_solution_mapping:
        if isinstance(solution_texts, str):
            issue_solution_mapping[key].append(solution_texts)
        elif isinstance(solution_texts, list):
            issue_solution_mapping[key].extend(solution_texts)
        else:
            issue_solution_mapping[key].append(str(solution_texts))
    else:
        issue_solution_mapping[key] = [solution_texts]

# Populate solution mapping from conversation data
for _, row in conversations_df.iterrows():
    key = (row['Category'], row['Sentiment'], row['Priority'])
    solution_texts = row['Agent Text']
    
    if key in issue_solution_mapping:
        if isinstance(solution_texts, str):
            issue_solution_mapping[key].append(solution_texts)
        elif isinstance(solution_texts, list):
            issue_solution_mapping[key].extend(solution_texts)
        else:
            issue_solution_mapping[key].append(str(solution_texts))
    else:
        issue_solution_mapping[key] = [solution_texts]

# Analyze New Customer Text Function
def analyze_text_nlp(text, issue_clf, sentiment_clf, priority_clf, vectorizer, solution_mapping):
    # Preprocess the input text
    preprocessed_text = preprocess_text(text)
    
    # Vectorize the text using TF-IDF vectorizer
    features = vectorizer.transform([preprocessed_text])
    
    # Predict issue type, sentiment, and priority using classifiers
    predicted_issue_type = issue_clf.predict(features)[0]
    predicted_sentiment = sentiment_clf.predict(features)[0]
    predicted_priority = priority_clf.predict(features)[0]
    
    # Find solutions based on predictions
    key = (predicted_issue_type, predicted_sentiment, predicted_priority)
    solutions = solution_mapping.get(key, ["No matching solution found."])
    
    return predicted_issue_type, predicted_sentiment, predicted_priority, solutions

# Example Usage of the System with a New Inquiry Text
new_customer_inquiry = "The app is running very slow. It takes forever to load. It is so frustrating." #"The app is running very slow. It takes forever to load. It is so frustrating."
predicted_issue_type, predicted_sentiment, predicted_priority, recommended_solutions = analyze_text_nlp(
    new_customer_inquiry,
    issue_classifier,
    sentiment_classifier,
    priority_classifier,
    tfidf_vectorizer,
    issue_solution_mapping
)

print("Analysis Result:")
print("--------------------------------------------------")
print(f"Predicted Issue Type:  {predicted_issue_type}")
print(f"Predicted Sentiment:   {predicted_sentiment}")
print(f"Predicted Priority:    {predicted_priority}")
print("\nRecommended Solutions:")
if recommended_solutions:
    for i, solution in enumerate(recommended_solutions, 1):
        print(f"{i}. {solution}")
else:
    print("No matching solution found.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shrey\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\shrey\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\shrey\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Analysis Result:
--------------------------------------------------
Predicted Issue Type:  Technical Support
Predicted Sentiment:   Anxious
Predicted Priority:    High

Recommended Solutions:
1. Hi there! Let’s resolve this together. Are both devices logged into the same account? Could you share the sync logs? Thanks for sharing! The log shows a corrupted sync token. I’ll reset it manually. Go to Settings > Sync > Force Full Sync and wait 10 minutes. Let me know if it works! Great news! A patch releasing next week will prevent this issue. Thanks for your patience!
