# üéØ Competitive Benchmarking - Voice of Customer Analysis

**Objective:** Compare BPCL against 3 major competitors (IndianOil, HPCL, Shell) by analyzing Google Play Store reviews.

**Apps Under Analysis:**
- Hello BPCL (BPCL)
- IndianOil ONE (IndianOil)
- HP Pay (HPCL)
- Shell Asia (Shell)

**Target:** 50,000 reviews per app

## üì¶ Step 1: Import Required Libraries

In [2]:
import pandas as pd
import numpy as np
from google_play_scraper import app, Sort, reviews_all
from datetime import datetime
import time
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported successfully")
print(f"Current Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

‚úÖ Libraries imported successfully
Current Date: 2026-01-15 23:22:47


## üéØ Step 2: Define App List

In [3]:
# Define apps to scrape
APPS = {
    'BPCL': 'com.cgt.bharatgas',
    'IndianOil': 'cx.indianoil.in',
    'HPCL': 'com.drivetrackplusrefuel',
    'Shell': 'com.shell.sitibv.shellgoplusindia'
}

TARGET_REVIEWS = 50000

print(f"üéØ Target: {TARGET_REVIEWS:,} reviews per app")
print(f"üì± Total Apps: {len(APPS)}")
print("\nApp List:")
for brand, package in APPS.items():
    print(f"  ‚Ä¢ {brand}: {package}")

üéØ Target: 50,000 reviews per app
üì± Total Apps: 4

App List:
  ‚Ä¢ BPCL: com.cgt.bharatgas
  ‚Ä¢ IndianOil: cx.indianoil.in
  ‚Ä¢ HPCL: com.drivetrackplusrefuel
  ‚Ä¢ Shell: com.shell.sitibv.shellgoplusindia


## üîß Step 3: Create Scraping Function

In [8]:
def scrape_competitive_data(apps_dict, target_count=50000, lang='en', country='in'):
    """
    Scrape Google Play Store reviews for multiple apps.
    Uses pagination for HPCL and Shell, fast method for BPCL and IndianOil.
    
    Parameters:
    -----------
    apps_dict : dict
        Dictionary with brand names as keys and package IDs as values
    target_count : int
        Number of reviews to scrape per app (default: 50000)
    lang : str
        Language code (default: 'en')
    country : str
        Country code (default: 'in')
    
    Returns:
    --------
    pd.DataFrame
        Combined DataFrame with all reviews
    """
    from google_play_scraper import reviews
    
    all_reviews = []
    
    # Apps that need pagination (limited reviews available)
    PAGINATION_APPS = ['HPCL', 'Shell']
    
    print("="*70)
    print("üöÄ STARTING COMPETITIVE DATA SCRAPING")
    print("="*70)
    
    for brand, package_id in apps_dict.items():
        print(f"\n{'='*70}")
        print(f"üì± Scraping: {brand} ({package_id})")
        print(f"{'='*70}")
        
        try:
            # Fetch app metadata first
            print(f"üìä Fetching app metadata...")
            app_info = app(package_id, lang=lang, country=country)
            print(f"   App Name: {app_info.get('title', 'N/A')}")
            print(f"   Rating: {app_info.get('score', 'N/A')} ‚≠ê")
            print(f"   Total Reviews: {app_info.get('reviews', 'N/A'):,}")
            
            print(f"\nüîç Scraping reviews (Target: {target_count:,})...")
            start_time = time.time()
            
            reviews_collected = []
            
            # Use different methods based on brand
            if brand in PAGINATION_APPS:
                print(f"   üîÑ Using PAGINATION method for {brand}...")
                continuation_token = None
                
                while len(reviews_collected) < target_count:
                    try:
                        # Fetch batch of reviews (200 per batch)
                        result, continuation_token = reviews(
                            package_id,
                            lang=lang,
                            country=country,
                            sort=Sort.NEWEST,
                            count=200,  # Max per request
                            continuation_token=continuation_token
                        )
                        
                        if not result:
                            print(f"\n   ‚ö†Ô∏è No more reviews available")
                            break
                        
                        reviews_collected.extend(result)
                        print(f"   üì• Fetched {len(reviews_collected):,} reviews so far...", end='\r')
                        
                        # If no continuation token, we've reached the end
                        if not continuation_token:
                            print(f"\n   ‚úÖ Reached end of available reviews")
                            break
                        
                        # Small delay to avoid rate limiting
                        time.sleep(0.5)
                        
                    except Exception as batch_error:
                        print(f"\n   ‚ö†Ô∏è Batch error: {str(batch_error)}")
                        break
            else:
                # Fast method for BPCL and IndianOil
                print(f"   ‚ö° Using FAST method for {brand}...")
                reviews_collected = reviews_all(
                    package_id,
                    sleep_milliseconds=0,
                    lang=lang,
                    country=country,
                    sort=Sort.NEWEST
                )
            
            # Limit to target count
            reviews_collected = reviews_collected[:target_count]
            
            elapsed = time.time() - start_time
            print(f"\n‚úÖ Scraped {len(reviews_collected):,} reviews in {elapsed:.2f}s")
            
            # Process reviews into structured format
            for review in reviews_collected:
                all_reviews.append({
                    'brand': brand,
                    'package_id': package_id,
                    'reviewId': review.get('reviewId'),
                    'content': review.get('content'),
                    'score': review.get('score'),
                    'at': review.get('at'),
                    'thumbsUpCount': review.get('thumbsUpCount', 0),
                    'reviewCreatedVersion': review.get('reviewCreatedVersion'),
                    'userName': review.get('userName')
                })
            
            print(f"‚úÖ {brand}: {len(reviews_collected):,} reviews processed")
            
        except Exception as e:
            print(f"‚ùå ERROR scraping {brand}: {str(e)}")
            print(f"   Continuing with next app...")
            continue
        
        # Delay between apps
        print(f"‚è≥ Waiting 2 seconds before next app...")
        time.sleep(2)
    
    print(f"\n{'='*70}")
    print("üéâ SCRAPING COMPLETED")
    print(f"{'='*70}")
    
    # Convert to DataFrame
    if all_reviews:
        df = pd.DataFrame(all_reviews)
        print(f"\nüìä Final Dataset Shape: {df.shape}")
        print(f"   Total Reviews: {len(df):,}")
        print(f"   Brands Covered: {df['brand'].nunique()}")
        print(f"\nüìà Reviews per Brand:")
        print(df['brand'].value_counts())
        return df
    else:
        print("‚ö†Ô∏è No reviews collected!")
        return pd.DataFrame()

print("‚úÖ Scraping function defined successfully (HYBRID MODE)")

‚úÖ Scraping function defined successfully (HYBRID MODE)


## üöÄ Step 4: Execute Scraping

In [9]:
# Execute the scraping
print("‚è≥ Starting competitive data collection...\n")

df_competitive = scrape_competitive_data(
    apps_dict=APPS,
    target_count=TARGET_REVIEWS,
    lang='en',
    country='in'
)

print("\n" + "="*70)
print("‚úÖ Data collection complete!")
print("="*70)

‚è≥ Starting competitive data collection...

üöÄ STARTING COMPETITIVE DATA SCRAPING

üì± Scraping: BPCL (com.cgt.bharatgas)
üìä Fetching app metadata...
   App Name: HelloBPCL
   Rating: 4.3761353 ‚≠ê
   Total Reviews: 125,129

üîç Scraping reviews (Target: 50,000)...
   ‚ö° Using FAST method for BPCL...

‚úÖ Scraped 50,000 reviews in 125.22s
‚úÖ BPCL: 50,000 reviews processed
‚è≥ Waiting 2 seconds before next app...

üì± Scraping: IndianOil (cx.indianoil.in)
üìä Fetching app metadata...
   App Name: IndianOil ONE
   Rating: 4.465825 ‚≠ê
   Total Reviews: 109,342

üîç Scraping reviews (Target: 50,000)...
   ‚ö° Using FAST method for IndianOil...

‚úÖ Scraped 50,000 reviews in 106.92s
‚úÖ IndianOil: 50,000 reviews processed
‚è≥ Waiting 2 seconds before next app...

üì± Scraping: HPCL (com.drivetrackplusrefuel)
üìä Fetching app metadata...
   App Name: HP PAY
   Rating: 4.172102 ‚≠ê
   Total Reviews: 26,167

üîç Scraping reviews (Target: 50,000)...
   üîÑ Using PAGINATION meth

## üíæ Step 5: Save Data & Verification

In [10]:
# Save to CSV
if not df_competitive.empty:
    output_file = 'competitive_reviews_raw.csv'
    df_competitive.to_csv(output_file, index=False, encoding='utf-8-sig')
    print(f"üíæ Saved to: {output_file}")
    print(f"üìä Final Shape: {df_competitive.shape}")
    print(f"\n{'='*70}")
    print("üìã DATASET SUMMARY")
    print(f"{'='*70}")
    print(f"Total Reviews: {len(df_competitive):,}")
    print(f"Total Brands: {df_competitive['brand'].nunique()}")
    print(f"Date Range: {df_competitive['at'].min()} to {df_competitive['at'].max()}")
    print(f"\nColumns: {list(df_competitive.columns)}")
    print(f"\nüìä Reviews per Brand:")
    print(df_competitive['brand'].value_counts())
    print(f"\n‚≠ê Average Rating by Brand:")
    print(df_competitive.groupby('brand')['score'].mean().round(2))
    print(f"\n‚úÖ SUCCESS! Data ready for analysis.")
else:
    print("‚ö†Ô∏è No data to save!")

üíæ Saved to: competitive_reviews_raw.csv
üìä Final Shape: (133476, 9)

üìã DATASET SUMMARY
Total Reviews: 133,476
Total Brands: 4
Date Range: 2018-01-10 14:18:13 to 2026-01-14 22:51:04

Columns: ['brand', 'package_id', 'reviewId', 'content', 'score', 'at', 'thumbsUpCount', 'reviewCreatedVersion', 'userName']

üìä Reviews per Brand:
brand
BPCL         50000
IndianOil    50000
HPCL         25541
Shell         7935
Name: count, dtype: int64

‚≠ê Average Rating by Brand:
brand
BPCL         4.15
HPCL         3.08
IndianOil    4.04
Shell        3.76
Name: score, dtype: float64

‚úÖ SUCCESS! Data ready for analysis.


## üîç Step 6: Quick Data Preview

In [11]:
# Display sample data
if not df_competitive.empty:
    print("üìã Sample Reviews (First 5):")
    print("="*70)
    display(df_competitive.head())
    
    print("\nüìä Data Info:")
    print("="*70)
    df_competitive.info()
    
    print("\nüìà Statistical Summary:")
    print("="*70)
    display(df_competitive[['score', 'thumbsUpCount']].describe())
else:
    print("‚ö†Ô∏è No data available for preview")

üìã Sample Reviews (First 5):


Unnamed: 0,brand,package_id,reviewId,content,score,at,thumbsUpCount,reviewCreatedVersion,userName
0,BPCL,com.cgt.bharatgas,7db4aded-efcc-4ddb-a45a-5598d1b8e50b,very good and superfast üëç,5,2026-01-14 22:51:04,0,4.0.84,A Google user
1,BPCL,com.cgt.bharatgas,51302cf2-0f6e-436f-85d8-033d4405c205,Good aplication,5,2026-01-14 22:47:37,0,4.0.84,A Google user
2,BPCL,com.cgt.bharatgas,0af76e61-9d75-413f-9257-0b67e630aba7,most satisfactory,5,2026-01-14 22:07:47,0,4.0.84,A Google user
3,BPCL,com.cgt.bharatgas,535c3545-d507-44c4-8f88-2d1425f1120a,very nice and efficient app. You can book a re...,5,2026-01-14 22:03:35,0,4.0.84,A Google user
4,BPCL,com.cgt.bharatgas,17f45dce-dd3a-41b1-b14e-8420fcb792ae,thank you,5,2026-01-14 21:55:04,0,4.0.84,A Google user



üìä Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 133476 entries, 0 to 133475
Data columns (total 9 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   brand                 133476 non-null  object        
 1   package_id            133476 non-null  object        
 2   reviewId              133476 non-null  object        
 3   content               133475 non-null  object        
 4   score                 133476 non-null  int64         
 5   at                    133476 non-null  datetime64[ns]
 6   thumbsUpCount         133476 non-null  int64         
 7   reviewCreatedVersion  119070 non-null  object        
 8   userName              133476 non-null  object        
dtypes: datetime64[ns](1), int64(2), object(6)
memory usage: 9.2+ MB

üìà Statistical Summary:


Unnamed: 0,score,thumbsUpCount
count,133476.0,133476.0
mean,3.882451,0.506024
std,1.609798,11.425947
min,1.0,0.0
25%,3.0,0.0
50%,5.0,0.0
75%,5.0,0.0
max,5.0,1578.0


## üìä Step 7: Data Processing & Analysis Functions

In [12]:
# Import additional libraries for analysis
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from nltk.corpus import stopwords

# Download required NLTK data
nltk.download('stopwords', quiet=True)
nltk.download('vader_lexicon', quiet=True)

print("‚úÖ Analysis libraries imported successfully")

‚úÖ Analysis libraries imported successfully


In [13]:
def filter_last_12_months(df):
    """
    Filter DataFrame to last 12 months of data.
    """
    df_copy = df.copy()
    df_copy['at'] = pd.to_datetime(df_copy['at'], errors='coerce')
    cutoff_date = pd.Timestamp.now() - pd.DateOffset(months=12)
    df_filtered = df_copy[df_copy['at'] >= cutoff_date].copy()
    print(f"üìÖ Filtered to last 12 months: {len(df_filtered):,} reviews (from {len(df_copy):,})")
    return df_filtered


def calculate_nss(df, date_filter=True):
    """
    Calculate Net Sentiment Score (NSS) per brand.
    
    NSS = ((Promoters - Detractors) / Total Reviews) * 100
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with competitive reviews
    date_filter : bool
        If True, filter to last 12 months (default: True)
    
    Returns:
    --------
    pd.DataFrame
        Summary with NSS per brand
    """
    
    # Apply date filter
    if date_filter:
        df = filter_last_12_months(df)
    
    df_copy = df.copy()
    
    print("\n" + "="*70)
    print("üéØ CALCULATING NET SENTIMENT SCORE (NSS)")
    print("="*70)
    
    # Create nps_segment based on star score
    def classify_segment(score):
        if score == 5:
            return 'Promoter'
        elif score >= 1 and score <= 3:
            return 'Detractor'
        else:  # score == 4
            return 'Passive'
    
    df_copy['nps_segment'] = df_copy['score'].apply(classify_segment)
    
    # Calculate NSS per brand
    nss_summary = []
    
    for brand in df_copy['brand'].unique():
        brand_data = df_copy[df_copy['brand'] == brand]
        
        promoters = len(brand_data[brand_data['nps_segment'] == 'Promoter'])
        detractors = len(brand_data[brand_data['nps_segment'] == 'Detractor'])
        passives = len(brand_data[brand_data['nps_segment'] == 'Passive'])
        total = len(brand_data)
        
        nss_score = ((promoters - detractors) / total * 100) if total > 0 else 0
        
        nss_summary.append({
            'Brand': brand,
            'Promoters': promoters,
            'Passives': passives,
            'Detractors': detractors,
            'Total_Reviews': total,
            'NSS_Score': nss_score
        })
        
        print(f"\nüìä {brand}:")
        print(f"   Promoters (5‚≠ê): {promoters:,} ({promoters/total*100:.1f}%)")
        print(f"   Passives (4‚≠ê):  {passives:,} ({passives/total*100:.1f}%)")
        print(f"   Detractors (1-3‚≠ê): {detractors:,} ({detractors/total*100:.1f}%)")
        print(f"   üéØ NSS Score: {nss_score:.2f}")
    
    df_nss = pd.DataFrame(nss_summary).sort_values('NSS_Score', ascending=False)
    
    print("\n" + "="*70)
    print("‚úÖ NSS SUMMARY")
    print("="*70)
    print(df_nss.to_string(index=False))
    
    return df_nss


def get_complaint_matrix(df, date_filter=True):
    """
    Create complaint matrix from negative reviews (1-2 stars).
    
    Normalized by TOTAL reviews per brand (not just negative ones).
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with competitive reviews
    date_filter : bool
        If True, filter to last 12 months (default: True)
    
    Returns:
    --------
    pd.DataFrame
        Complaint matrix suitable for heatmap
    """
    
    # Apply date filter
    if date_filter:
        df = filter_last_12_months(df)
    
    df_copy = df.copy()
    
    print("\n" + "="*70)
    print("üîç ANALYZING COMPLAINT PATTERNS")
    print("="*70)
    
    # Define complaint topics
    topics = {
        'Login': ['login', 'otp', 'sms', 'verify', 'authentication'],
        'Payment': ['fail', 'money', 'deduct', 'wallet', 'payment', 'charge'],
        'UI': ['slow', 'hang', 'crash', 'freeze', 'lag', 'performance'],
        'Support': ['customer care', 'ticket', 'reply', 'support', 'help', 'contact']
    }
    
    # Convert content to lowercase for matching
    df_copy['content_lower'] = df_copy['content'].fillna('').str.lower()
    
    # Get total reviews per brand for normalization
    total_reviews_per_brand = df_copy.groupby('brand').size()
    
    complaint_data = []
    
    for brand in df_copy['brand'].unique():
        brand_data = df_copy[df_copy['brand'] == brand]
        
        # Filter for negative reviews (1-2 stars)
        negative_reviews = brand_data[brand_data['score'] <= 2]
        
        total_brand_reviews = total_reviews_per_brand[brand]
        
        print(f"\nüì± {brand}:")
        print(f"   Total Reviews: {total_brand_reviews:,}")
        print(f"   Negative Reviews (1-2‚≠ê): {len(negative_reviews):,}")
        
        brand_complaint = {'Brand': brand}
        
        for topic, keywords in topics.items():
            # Count reviews containing topic keywords
            topic_count = 0
            for keyword in keywords:
                topic_count += negative_reviews['content_lower'].str.contains(keyword, regex=False, na=False).sum()
            
            # Normalize by TOTAL reviews (not just negative)
            problem_rate = (topic_count / total_brand_reviews * 100) if total_brand_reviews > 0 else 0
            brand_complaint[topic] = problem_rate
            
            print(f"   {topic}: {topic_count} mentions ‚Üí {problem_rate:.2f}% problem rate")
        
        complaint_data.append(brand_complaint)
    
    df_complaints = pd.DataFrame(complaint_data).set_index('Brand')
    
    print("\n" + "="*70)
    print("‚úÖ COMPLAINT MATRIX (Problem Rate %)")
    print("="*70)
    print(df_complaints.round(2))
    
    return df_complaints


def get_blue_ocean_features(df, date_filter=True):
    """
    Identify "Blue Ocean" features: what competitors praise that BPCL doesn't.
    
    Analysis: Uses bigrams from positive reviews (5‚≠ê) only.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with competitive reviews
    date_filter : bool
        If True, filter to last 12 months (default: True)
    
    Returns:
    --------
    dict
        Dictionary with Blue Ocean opportunities per competitor
    """
    
    # Apply date filter
    if date_filter:
        df = filter_last_12_months(df)
    
    df_copy = df.copy()
    
    print("\n" + "="*70)
    print("üåä BLUE OCEAN ANALYSIS - Competitor Advantages")
    print("="*70)
    
    # Filter for positive reviews (5 stars)
    positive_reviews = df_copy[df_copy['score'] == 5].copy()
    positive_reviews['content_clean'] = positive_reviews['content'].fillna('').str.lower()
    
    blue_ocean_results = {}
    
    # Get BPCL bigrams (top 50)
    bpcl_positive = positive_reviews[positive_reviews['brand'] == 'BPCL']['content_clean']
    
    if len(bpcl_positive) > 0:
        print(f"\nüìä BPCL Positive Reviews: {len(bpcl_positive):,}")
        
        bpcl_vectorizer = CountVectorizer(ngram_range=(2, 2), stop_words='english', max_features=50)
        bpcl_bigrams = bpcl_vectorizer.fit_transform(bpcl_positive)
        bpcl_top_features = set(bpcl_vectorizer.get_feature_names_out())
        
        print(f"   Top BPCL Bigrams (Sample): {list(bpcl_top_features)[:10]}")
    else:
        bpcl_top_features = set()
        print(f"\nüìä BPCL Positive Reviews: 0 (Skipping BPCL analysis)")
    
    # Compare competitors
    competitors = ['IndianOil', 'HPCL', 'Shell']
    
    for competitor in competitors:
        comp_positive = positive_reviews[positive_reviews['brand'] == competitor]['content_clean']
        
        if len(comp_positive) > 0:
            print(f"\nüèÜ {competitor} Positive Reviews: {len(comp_positive):,}")
            
            comp_vectorizer = CountVectorizer(ngram_range=(2, 2), stop_words='english', max_features=10)
            comp_bigrams = comp_vectorizer.fit_transform(comp_positive)
            comp_top_features = set(comp_vectorizer.get_feature_names_out())
            
            print(f"   Top {competitor} Bigrams: {comp_top_features}")
            
            # Blue Ocean = In competitor top 10 but NOT in BPCL top 50
            blue_ocean_features = comp_top_features - bpcl_top_features
            
            if blue_ocean_features:
                print(f"   üåä BLUE OCEAN (Competitor advantage): {blue_ocean_features}")
                blue_ocean_results[competitor] = list(blue_ocean_features)
            else:
                print(f"   ‚úÖ No unique advantage (BPCL covers these topics)")
                blue_ocean_results[competitor] = []
        else:
            print(f"\nüèÜ {competitor} Positive Reviews: 0")
            blue_ocean_results[competitor] = []
    
    print("\n" + "="*70)
    print("‚úÖ BLUE OCEAN SUMMARY")
    print("="*70)
    for competitor, features in blue_ocean_results.items():
        if features:
            print(f"üåä {competitor}: {', '.join(features)}")
        else:
            print(f"‚úÖ {competitor}: No significant advantage")
    
    return blue_ocean_results


print("‚úÖ All analysis functions defined successfully")

‚úÖ All analysis functions defined successfully


## üöÄ Step 8: Execute Analysis

In [14]:
# Run all analyses
print("‚è≥ Starting comprehensive competitive analysis...\n")

# 1. Net Sentiment Score
nss_results = calculate_nss(df_competitive, date_filter=True)

# 2. Complaint Matrix
complaint_matrix = get_complaint_matrix(df_competitive, date_filter=True)

# 3. Blue Ocean Features
blue_ocean = get_blue_ocean_features(df_competitive, date_filter=True)

print("\n" + "="*70)
print("‚úÖ ALL ANALYSES COMPLETED!")
print("="*70)

‚è≥ Starting comprehensive competitive analysis...

üìÖ Filtered to last 12 months: 45,817 reviews (from 133,476)

üéØ CALCULATING NET SENTIMENT SCORE (NSS)

üìä BPCL:
   Promoters (5‚≠ê): 17,307 (68.1%)
   Passives (4‚≠ê):  2,977 (11.7%)
   Detractors (1-3‚≠ê): 5,122 (20.2%)
   üéØ NSS Score: 47.96

üìä IndianOil:
   Promoters (5‚≠ê): 7,400 (61.3%)
   Passives (4‚≠ê):  1,425 (11.8%)
   Detractors (1-3‚≠ê): 3,243 (26.9%)
   üéØ NSS Score: 34.45

üìä HPCL:
   Promoters (5‚≠ê): 3,132 (54.7%)
   Passives (4‚≠ê):  355 (6.2%)
   Detractors (1-3‚≠ê): 2,240 (39.1%)
   üéØ NSS Score: 15.58

üìä Shell:
   Promoters (5‚≠ê): 1,645 (62.9%)
   Passives (4‚≠ê):  124 (4.7%)
   Detractors (1-3‚≠ê): 847 (32.4%)
   üéØ NSS Score: 30.50

‚úÖ NSS SUMMARY
    Brand  Promoters  Passives  Detractors  Total_Reviews  NSS_Score
     BPCL      17307      2977        5122          25406  47.961112
IndianOil       7400      1425        3243          12068  34.446470
    Shell       1645       124        

## üìà Step 9: Visualize Results

In [15]:
import plotly.graph_objects as go
import plotly.express as px

# 1. NSS Comparison Bar Chart
fig_nss = px.bar(
    nss_results.sort_values('NSS_Score', ascending=True),
    x='NSS_Score',
    y='Brand',
    orientation='h',
    title='üéØ Net Sentiment Score (NSS) Comparison',
    labels={'NSS_Score': 'NSS Score', 'Brand': 'Brand'},
    color='NSS_Score',
    color_continuous_scale='RdYlGn',
    text='NSS_Score'
)
fig_nss.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig_nss.show()

print("\n‚úÖ NSS Chart displayed\n")

# 2. Complaint Matrix Heatmap
fig_heatmap = px.imshow(
    complaint_matrix,
    labels=dict(x="Complaint Category", y="Brand", color="Problem Rate (%)"),
    title="üî• Complaint Heatmap: Problem Rate by Brand & Category",
    color_continuous_scale="YlOrRd",
    text_auto='.2f'
)
fig_heatmap.update_layout(height=400)
fig_heatmap.show()

print("\n‚úÖ Complaint Heatmap displayed\n")

# 3. Sentiment Distribution by Brand
sentiment_dist = df_competitive.copy()
sentiment_dist['at'] = pd.to_datetime(sentiment_dist['at'], errors='coerce')
cutoff_date = pd.Timestamp.now() - pd.DateOffset(months=12)
sentiment_dist = sentiment_dist[sentiment_dist['at'] >= cutoff_date]

fig_dist = px.box(
    sentiment_dist,
    x='brand',
    y='score',
    title='‚≠ê Rating Distribution by Brand (Last 12 Months)',
    labels={'brand': 'Brand', 'score': 'Rating'},
    color='brand',
    points='outliers'
)
fig_dist.show()

print("\n‚úÖ Rating Distribution displayed\n")

# 4. Review Volume by Brand
fig_volume = px.bar(
    sentiment_dist.groupby('brand').size().reset_index(name='Count'),
    x='brand',
    y='Count',
    title='üìä Review Volume by Brand (Last 12 Months)',
    labels={'brand': 'Brand', 'Count': 'Number of Reviews'},
    color='brand',
    text='Count'
)
fig_volume.update_traces(texttemplate='%{text:,}', textposition='outside')
fig_volume.show()

print("\n‚úÖ Review Volume displayed")
print("\n" + "="*70)
print("‚úÖ ALL VISUALIZATIONS COMPLETE!")
print("="*70)


‚úÖ NSS Chart displayed




‚úÖ Complaint Heatmap displayed




‚úÖ Rating Distribution displayed




‚úÖ Review Volume displayed

‚úÖ ALL VISUALIZATIONS COMPLETE!
