## Packages

In [28]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
from textblob import TextBlob
from spellchecker import SpellChecker  # For spell correction
import os

### Step 1: Download necessary NLTK resources (if not already downloaded)
##### nltk.download('punkt')
##### nltk.download('stopwords')

In [29]:
# Sample positive and negative words lists (You can expand these lists as needed)
positive_words = ['love', 'amazing', 'great', 'fantastic', 'happy', 'good', 'excellent', 'positive']
negative_words = ['worst', 'bad', 'hate', 'horrible', 'poor', 'negative', 'disappointing', 'sad']

# Initialize spell checker
spell = SpellChecker()

### Step 2: Load feedback data from an Excel CSV file

In [30]:
file_path = r'D:/Project File/Excel data/Feedback.csv'  # Update this path as needed
df = pd.read_csv(file_path)

### Step 3: Check and print column names to ensure correct column for feedback

In [31]:
print("Columns in the CSV file:", df.columns)

# If no "FeedBack" column, attempt to find a suitable one and use it
if 'FeedBack' not in df.columns:
    # Check if "feedback" column exists (in case of lowercase column name)
    if 'feedback' in df.columns:
        df['FeedBack'] = df['feedback']
    else:
        raise ValueError("The CSV file must contain a 'FeedBack' column, or it must be manually set to the correct column.")

# Convert only the first letter of the feedback to lowercase
df['FeedBack'] = df['FeedBack'].apply(lambda x: x[0].lower() + x[1:] if len(x) > 0 else x)

Columns in the CSV file: Index(['FeedBack'], dtype='object')


### Step 4: Preprocess the feedback text (tokenization, remove stopwords and punctuation, correct spelling)

In [32]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to C:\Users\Chandru
[nltk_data]     R\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [33]:
def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text.lower())  # Convert to lowercase to normalize
    
    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]
    
    # Correct spelling for tokens
    tokens = [spell.correction(word) for word in tokens]
    
    return tokens

# Apply the preprocessing function to the FeedBack column
df['Tokens'] = df['FeedBack'].apply(preprocess_text)

### Step 5: Handling negations and adjusting sentiment

In [34]:
def handle_negations(tokens):
    adjusted_tokens = []
    negation_words = ['not', 'never', 'no', 'cannot', 'don’t', "isn't", "aren't"]
    for i, word in enumerate(tokens):
        if word in negation_words and i+1 < len(tokens):
            # Look for the next word, and flip its sentiment
            next_word = tokens[i+1]
            if next_word in positive_words:
                adjusted_tokens.append('not_' + next_word)  # Mark as negated positive word
            elif next_word in negative_words:
                adjusted_tokens.append('not_' + next_word)  # Mark as negated negative word
            else:
                adjusted_tokens.append(next_word)  # Keep the word unchanged if it's not in the sentiment list
        else:
            adjusted_tokens.append(word)
    return adjusted_tokens

# Apply the negation handling to the Tokens column
df['Adjusted_Tokens'] = df['Tokens'].apply(handle_negations)

### Step 6: Analyze Sentiment with Adjusted Tokens

In [35]:
def analyze_sentiment(tokens):
    # Count positive and negative words (adjusted with negation)
    positive_count = sum(1 for word in tokens if word in positive_words)
    negative_count = sum(1 for word in tokens if word in negative_words)
    
    # Adjust for negations by flipping counts
    for word in tokens:
        if word.startswith('not_'):
            # If a word is marked with 'not_', flip its sentiment
            if word[4:] in positive_words:
                positive_count -= 1
                negative_count += 1
            elif word[4:] in negative_words:
                negative_count -= 1
                positive_count += 1

    # Determine the sentiment based on counts
    if positive_count > negative_count:
        sentiment = 'Positive'
    elif negative_count > positive_count:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
    
    return sentiment, positive_count, negative_count

# Apply the sentiment analysis to the adjusted tokens and store results
df[['Sentiment', 'Positive_Count', 'Negative_Count']] = df['Adjusted_Tokens'].apply(lambda x: pd.Series(analyze_sentiment(x)))

### Step 7: Perform Sentiment Analysis Using TextBlob for a more comprehensive approach

In [36]:
def get_sentiment_blob(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity  # Sentiment polarity: -1 (negative) to 1 (positive)
    if sentiment > 0:
        return 'Positive'
    elif sentiment < 0:
        return 'Negative'
    else:
        return 'Neutral'

# Apply TextBlob Sentiment Analysis
df['Sentiment_TextBlob'] = df['FeedBack'].apply(get_sentiment_blob)

### Step 8: Specify the output directory where you want to store the files

In [37]:
output_directory = 'D:/Project File/Analysed Data'  # Replace with your desired path

# Ensure the directory exists, otherwise create it
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Set the full file path for the CSV and Excel output
csv_file_path = os.path.join(output_directory, 'feedback_with_sentiment_analysis.csv')

### Step 9: Save the result to CSV in the specified location

In [42]:
df.to_csv(csv_file_path, index=False)
print(f"Sentiment analysis completed and saved to:\nCSV: {csv_file_path}")

Sentiment analysis completed and saved to:
CSV: D:/Project File/Analysed Data\feedback_with_sentiment_analysis.csv
