## Sentiment analysis

In [1]:
import pandas as pd
import nltk
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
from textblob import TextBlob 

In [2]:
positive_words = ['love', 'amazing', 'great', 'fantastic', 'happy', 'good', 'excellent', 'positive']
negative_words = ['worst', 'bad', 'hate', 'horrible', 'poor', 'negative', 'disappointing', 'sad']

In [3]:
file_path = r'C:\Users\Chandru R\Desktop\Document\MYSQL Data (CSV Format DataSet)\Text Message\conversation_text.csv' 
df = pd.read_csv(file_path)

In [4]:
print("Columns in the CSV file:", df.columns)
if 'text_message' not in df.columns:
    if 'text_message' in df.columns:
        df['text_message'] = df['text_message']
    else:
        raise ValueError("The CSV file must contain a 'text_message' column, or it must be manually set to the correct column.")
df['text_message'] = df['text_message'].apply(lambda x: x[0].lower() + x[1:] if len(x) > 0 else x)

Columns in the CSV file: Index(['chat_row', 'text_message'], dtype='object')


In [5]:
nltk.download('punkt')  # Download the 'punkt' tokenizer model (this is used for word tokenization)
nltk.download('stopwords')  # Download the 'stopwords' corpus (this contains common stopwords in multiple languages)

[nltk_data] Downloading package punkt to C:\Users\Chandru
[nltk_data]     R\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Chandru
[nltk_data]     R\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
def preprocess_text(text): #1. Tokenization and 6. Text Preprocessing
    text = text.lower()
    text = re.sub(r"[@/{}[]/#!^$%&|~<>?^a-zA-Z0-9\s]", '', text) #3.Punctuation Removal
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english')) # 2. Stopword Removal
    tokens = [word for word in tokens if word not in stop_words]
    return tokens
df['Tokens'] = df['text_message'].apply(preprocess_text)

In [7]:
def handle_negations(tokens): #4.Negation Handling
    adjusted_tokens = []
    negation_words = ['not', 'never', 'no', 'cannot', 'don’t', "isn't", "aren't"]  
    for i, word in enumerate(tokens):
        if word in negation_words and i+1 < len(tokens):  
            next_word = tokens[i+1]
            if next_word in positive_words:
                adjusted_tokens.append('not_' + next_word)  
            elif next_word in negative_words:
                adjusted_tokens.append('not_' + next_word)  
            else:
                adjusted_tokens.append(next_word)  
        else:
            adjusted_tokens.append(word) 
    return adjusted_tokens
df['Adjusted_Tokens'] = df['Tokens'].apply(handle_negations)

In [8]:
def analyze_sentiment(tokens): #5. Rule-Based Sentiment Analysis
    positive_count = sum(1 for word in tokens if word in positive_words)  
    negative_count = sum(1 for word in tokens if word in negative_words)  
    for word in tokens:
        if word.startswith('not_'):  
            if word[4:] in positive_words:
                positive_count -= 1
                negative_count += 1 
            elif word[4:] in negative_words:
                negative_count -= 1
                positive_count += 1  
    if positive_count > negative_count:
        sentiment = 'Positive'  
    elif negative_count > positive_count:
        sentiment = 'Negative'  
    else:
        sentiment = 'Neutral' 
    return sentiment, positive_count, negative_count
df[['Sentiment', 'Positive_Count', 'Negative_Count']] = df['Adjusted_Tokens'].apply(lambda x: pd.Series(analyze_sentiment(x)))

In [9]:
def get_sentiment_blob(text):
    blob = TextBlob(text)  
    sentiment = blob.sentiment.polarity  
    if sentiment > 0:
        return 'Positive'  
    elif sentiment < 0:
        return 'Negative'  
    else:
        return 'Neutral' 
df['Sentiment_TextBlob'] = df['text_message'].apply(get_sentiment_blob)

In [10]:
output_file_path = "C:/Users/Chandru R/Desktop/Document/Analysed NLP data/Sentiment Analysis/feedback_with_sentiment_analysis.csv" 
if output_file_path:
    df.to_csv(output_file_path, index=False)  
    print(f"\nCleaned data has been saved to {output_file_path}")
else:
    print("No file path provided. Data was not saved.")


Cleaned data has been saved to C:/Users/Chandru R/Desktop/Document/Analysed NLP data/Sentiment Analysis/feedback_with_sentiment_analysis.csv
