In [1]:
import spacy
import pandas as pd
from textblob import TextBlob
import re
import logging
import sys
from pathlib import Path
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
# Check if spaCy model is available
try:
    nlp = spacy.load("en_core_web_sm")
except OSError as e:
    logger.error("spaCy model 'en_core_web_sm' not found. Please install it using: python -m spacy download en_core_web_sm")
    sys.exit(1)

In [4]:
data = pd.read_csv('amazon.csv')
data.head()

Unnamed: 0,Text,label
0,This is the best apps acording to a bunch of ...,1
1,This is a pretty good version of the game for ...,1
2,this is a really . there are a bunch of levels...,1
3,"This is a silly game and can be frustrating, b...",1
4,This is a terrific game on any pad. Hrs of fun...,1


In [5]:
tickets2 =  [{"ticket_id": i+7, "description": desc} for i, desc in enumerate(data["Text"].dropna())]

In [6]:
# Define sample dataset of customer support tickets
tickets1 = [
    {"ticket_id": 1, "description": "Ugh, fine. You need a return label for that stupid product? Here it is #58923. Just take it"},
    {"ticket_id": 2, "description": "Thank you for the quick response. I'm satisfied with the service."},
    {"ticket_id": 3, "description": "The website is not loading properly. Please fix it ASAP."},
    {"ticket_id": 4, "description": "I need to return a product. Can you send me the return label?"},
    {"ticket_id": 5, "description": "Ugh, seriously? This is the best you could come up with?"},
    {"ticket_id": 6, "description": "Can you help me track my order from Amazon?"}

]

In [7]:
tickets = tickets1+tickets2[:93]

In [8]:
# Define NLP processing functions
def clean_text(text):
    """
    Cleans input text by converting to lowercase, removing punctuation and stop words,
    and lemmatizing words using spaCy.
    """
    try:
        if not isinstance(text, str):
            logger.warning(f"Invalid text input: {text}. Returning empty string.")
            return ""
        doc = nlp(text.lower())
        cleaned = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
        return " ".join(cleaned)
    except Exception as e:
        logger.error(f"Error cleaning text: {str(e)}")
        return ""

In [9]:
def extract_entities(text):
    """
    Extracts named entities from text using spaCy's NER.
    Returns a list of tuples (entity text, entity label).
    """
    try:
        if not isinstance(text, str):
            logger.warning(f"Invalid text input for entity extraction: {text}. Returning empty list.")
            return []
        doc = nlp(text)
        entities = [(ent.text, ent.label_) for ent in doc.ents]
        return entities
    except Exception as e:
        logger.error(f"Error extracting entities: {str(e)}")
        return []

In [10]:
def identify_intent(text):
    text = text.lower()

    if any(word in text for word in ["return", "refund", "send back", "didn't like"]):
        return "return_request"
    elif any(word in text for word in ["thank", "great", "amazing", "fun", "enjoy", "love", "awesome"]):
        return "positive_feedback"
    elif any(word in text for word in ["boring", "bad", "hate", "crash", "not working", "waste", "bug", "issue", "problem"]):
        return "negative_feedback"
    elif any(word in text for word in ["help", "how", "can you", "support", "assist", "instructions"]):
        return "inquiry"
    elif any(word in text for word in ["delay", "late", "not arrived", "shipping", "delivery"]):
        return "order_issue"
    elif any(word in text for word in ["too many ads", "ads are annoying", "ads interrupt", "ads ruin", "ads are bad"]):
        return "ads_complaint"
    else:
        return "general_comment"

In [3]:
# Since SentimnetIntensityAnalyzer from vanderSentiment provided better results, it has been used instead of textBlob for sentiment scores.

In [11]:
def get_sentiment(text):
    """
    Computes sentiment polarity of text using TextBlob.
    Returns a float between -1 (negative) and 1 (positive).
    """
    try:
        if not isinstance(text, str):
            logger.warning(f"Invalid text input for sentiment analysis: {text}. Returning 0.0.")
            return 0.0
        # blob = TextBlob(text)
        # return blob.sentiment.polarity
        analyzer = SentimentIntensityAnalyzer()
        score = analyzer.polarity_scores(text)
        del score['compound']
        max_key = max(score,key=score.get)
        return score[max_key]
    except Exception as e:
        logger.error(f"Error computing sentiment: {str(e)}")
        return 0.0

In [12]:
def get_sentiment_label(text):
    """
    Converts sentiment score to a label (Positive, Negative, Neutral).
    """
    analyzer = SentimentIntensityAnalyzer()
    score = analyzer.polarity_scores(text)
    del score['compound']
    return max(score,key=score.get)
    try:
        if not isinstance(text, str):
            logger.warning(f"Invalid text input for sentiment analysis: {text}. Returning neu.")
            return neu
        # blob = TextBlob(text)
        # return blob.sentiment.polarity
        analyzer = SentimentIntensityAnalyzer()
        score = analyzer.polarity_scores(text)
        del score['compound']
        return max(score,key=score.get)
    except Exception as e:
        logger.error(f"Error labeling sentiment: {str(e)}")
        return "Neutral"


In [13]:
def extract_order_number(text):
    """
    Extracts order numbers from text using a regular expression.
    Assumes order numbers are in the format # followed by digits.
    Returns the order number if found, otherwise None.
    """
    try:
        if not isinstance(text, str):
            logger.warning(f"Invalid text input for order number extraction: {text}. Returning None.")
            return None
        pattern = r"#(\d+)"
        match = re.search(pattern, text)
        return match.group(1) if match else None
    except Exception as e:
        logger.error(f"Error extracting order number: {str(e)}")
        return None

In [14]:
def main():
    """
    Main function to process the dataset and enhance text analytics data quality.
    """
    try:
        # Create a pandas DataFrame from the sample dataset
        logger.info("Creating DataFrame from sample dataset")
        df = pd.DataFrame(tickets)

        # Apply NLP functions to enhance the dataset
        logger.info("Applying NLP processing to dataset")
        df['cleaned_description'] = df['description'].apply(clean_text)
        df['entities'] = df['description'].apply(extract_entities)
        df['sentiment_score'] = df['description'].apply(get_sentiment)
        df['sentiment_label'] = df['cleaned_description'].apply(get_sentiment_label)
        df['order_number'] = df['description'].apply(extract_order_number)
        df['intent'] = df['description'].apply(identify_intent)

        # Display the enriched dataset
        logger.info("Displaying enriched dataset")
        print("\nEnriched Dataset:")
        print(df)

        # Save the enriched dataset to a CSV file
        output_path = Path("enriched_tickets.csv")
        logger.info(f"Saving enriched dataset to {output_path}")
        df.to_csv(output_path, index=False)
        logger.info(f"Dataset successfully saved to {output_path}")

    except Exception as e:
        logger.error(f"Error in main processing: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    main()

2025-05-17 12:41:33,105 - INFO - Creating DataFrame from sample dataset
2025-05-17 12:41:33,105 - INFO - Applying NLP processing to dataset
2025-05-17 12:41:40,839 - INFO - Displaying enriched dataset
2025-05-17 12:41:40,858 - INFO - Saving enriched dataset to enriched_tickets.csv
2025-05-17 12:41:40,873 - INFO - Dataset successfully saved to enriched_tickets.csv



Enriched Dataset:
    ticket_id                                        description  \
0           1  Ugh, fine. You need a return label for that st...   
1           2  Thank you for the quick response. I'm satisfie...   
2           3  The website is not loading properly. Please fi...   
3           4  I need to return a product. Can you send me th...   
4           5  Ugh, seriously? This is the best you could com...   
..        ...                                                ...   
94         95  Hey it's Angry Birds, what's not to love!  Eas...   
95         96  I can't believe how  hard I try to hit the tar...   
96         97  I didn't like it at all .I think it is more fo...   
97         98  I got to the end of he pre-installed version. ...   
98         99  I hate it they say no at everything l got rid ...   

                                  cleaned_description  \
0     ugh fine need return label stupid product 58923   
1              thank quick response satisfied serv