# 1. Sentiment Analysis

Determine the overall sentiment of the reviews (positive, negative, neutral) to gauge customer satisfaction.


Tools/Libraries: nltk, TextBlob, VADER, spaCy, BERT

In [None]:
from textblob import TextBlob

# Example function to get sentiment polarity
def get_sentiment(review):
    analysis = TextBlob(review)
    return analysis.sentiment.polarity  # Returns a value between -1 (negative) and 1 (positive)


# 2. Topic Modeling

Identify common themes or topics in the reviews to understand what aspects are frequently mentioned.

Tools/Libraries: gensim (LDA), sklearn (NMF), spaCy

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Example function to perform topic modeling
def extract_topics(reviews, num_topics=5):
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(reviews)
    lda = LatentDirichletAllocation(n_components=num_topics)
    lda.fit(X)
    return lda, vectorizer


# 3. Frequency Analysis

Identify the most frequently mentioned words or phrases in the reviews.

Tools/Libraries: nltk, collections.Counter, wordcloud

In [None]:
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Example function to get word frequencies
def get_word_frequencies(reviews):
    stop_words = set(stopwords.words('english'))
    all_words = [word for review in reviews for word in word_tokenize(review.lower()) if word.isalnum() and word not in stop_words]
    return Counter(all_words).most_common(10)


# 4. Aspect-Based Sentiment Analysis (ABSA)

Analyze sentiment related to specific aspects (e.g., service, price, quality) of the product or service.

Tools/Libraries: Custom NLP models, nltk, spaCy


# 5. Rating Distribution

Analyze the distribution of ratings to understand overall customer satisfaction.

Tools/Libraries: pandas, matplotlib, seaborn

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Example function to plot rating distribution
def plot_rating_distribution(ratings):
    sns.countplot(x=ratings)
    plt.xlabel('Rating')
    plt.ylabel('Frequency')
    plt.title('Rating Distribution')
    plt.show()


# 6. Review Length Analysis

Analyze the length of reviews to understand the depth of customer feedback.

Tools/Libraries: pandas, matplotlib

In [None]:
# Example function to plot review length distribution
def plot_review_length_distribution(reviews):
    review_lengths = [len(review.split()) for review in reviews]
    plt.hist(review_lengths, bins=30)
    plt.xlabel('Review Length (words)')
    plt.ylabel('Frequency')
    plt.title('Review Length Distribution')
    plt.show()


# 7. Temporal Analysis

Analyze how reviews and ratings change over time to identify trends.

Tools/Libraries: pandas, matplotlib

In [None]:
# Assuming your dataset has a 'date' column in datetime format and a 'rating' column
def plot_reviews_over_time(reviews_df):
    reviews_df['date'] = pd.to_datetime(reviews_df['date'])
    reviews_df.set_index('date', inplace=True)
    reviews_df.resample('M').size().plot()
    plt.xlabel('Date')
    plt.ylabel('Number of Reviews')
    plt.title('Reviews Over Time')
    plt.show()


# 8. Customer Segmentation

Segment customers based on their reviews to identify different customer personas or segments.

Tools/Libraries: sklearn (clustering algorithms like KMeans)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

# Example function to segment customers
def segment_customers(reviews, num_clusters=3):
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(reviews)
    kmeans = KMeans(n_clusters=num_clusters)
    clusters = kmeans.fit_predict(X)
    return clusters


# 9. Comparative Analysis

Compare reviews across different products, services, or time periods.

Tools/Libraries: pandas, matplotlib

In [None]:
# Example function to compare average ratings across products
def compare_average_ratings(reviews_df):
    avg_ratings = reviews_df.groupby('product')['rating'].mean()
    avg_ratings.plot(kind='bar')
    plt.xlabel('Product')
    plt.ylabel('Average Rating')
    plt.title('Average Ratings by Product')
    plt.show()


# 10. Review Highlights

Extract key highlights or snippets from reviews that capture the essence of customer feedback.

Tools/Libraries: nltk, spaCy, BERT

In [None]:
# Example function to get key highlights using TextRank algorithm
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

def get_review_highlights(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    keyword = []
    pos_tag = ['PROPN', 'ADJ', 'NOUN', 'VERB']
    for token in doc:
        if(token.text in STOP_WORDS or token.text in punctuation):
            continue
        if(token.pos_ in pos_tag):
            keyword.append(token.text)
    freq_word = Counter(keyword)
    max_freq = Counter(keyword).most_common(1)[0][1]
    for word in freq_word:
        freq_word[word] = (freq_word[word]/max_freq)
    sent_strength={}
    for sent in doc.sents:
        for word in sent:
            if word.text in freq_word.keys():
                if sent in sent_strength.keys():
                    sent_strength[sent]+=freq_word[word.text]
                else:
                    sent_strength[sent]=freq_word[word.text]
    summarized_sentences = nlargest(3, sent_strength, key=sent_strength.get)
    final_sentences = [w.text for w in summarized_sentences]
    summary = ' '.join(final_sentences)
    return summary

# Example usage
review_text = "I loved the product! The quality is great and the delivery was on time. Customer service was very helpful and responsive. Would definitely recommend to others."
print(get_review_highlights(review_text))
