# Prototype

Prototype a dashboard for the hotel industry about sentiment analysis to visualize customer feedback data, highlighting overall sentiment trends, key topics across different hotels or service aspects. It enables stakeholders to quickly identify strengths, areas of concern, and evolving customer perceptions.

## 1. Import libraries

In [1]:
pip install gradio

Note: you may need to restart the kernel to use updated packages.


In [2]:
import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


## 2. Load Data

In [3]:
csv_path = './df_dashboard.csv'
data = pd.read_csv(csv_path)

In [4]:
data.head()

Unnamed: 0,id,hotel,city,country,date,sentiment,emotions,topics,dominant_topic_keywords
0,0,park hotel shanghai,shanghai,china,2009-11-02 00:00:00,positive,approval,Staff & Service,"great, location, good, nice, clean"
1,1,park hotel shanghai,shanghai,china,2009-10-22 00:00:00,neutral,confusion,Location & Amenities,"night, would, one, get, time"
2,2,park hotel shanghai,shanghai,china,2009-07-30 00:00:00,positive,caring,Staff & Service,"great, location, good, nice, clean"
3,3,park hotel shanghai,shanghai,china,2009-06-03 00:00:00,positive,annoyance,Location & Amenities,"night, would, one, get, time"
4,4,park hotel shanghai,shanghai,china,2008-07-23 00:00:00,negative,confusion,Location & Amenities,"night, would, one, get, time"


In [5]:
# Create a dataframe
df = pd.DataFrame(data)

In [6]:
df.head()

Unnamed: 0,id,hotel,city,country,date,sentiment,emotions,topics,dominant_topic_keywords
0,0,park hotel shanghai,shanghai,china,2009-11-02 00:00:00,positive,approval,Staff & Service,"great, location, good, nice, clean"
1,1,park hotel shanghai,shanghai,china,2009-10-22 00:00:00,neutral,confusion,Location & Amenities,"night, would, one, get, time"
2,2,park hotel shanghai,shanghai,china,2009-07-30 00:00:00,positive,caring,Staff & Service,"great, location, good, nice, clean"
3,3,park hotel shanghai,shanghai,china,2009-06-03 00:00:00,positive,annoyance,Location & Amenities,"night, would, one, get, time"
4,4,park hotel shanghai,shanghai,china,2008-07-23 00:00:00,negative,confusion,Location & Amenities,"night, would, one, get, time"


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22000 entries, 0 to 21999
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   id                       22000 non-null  int64 
 1   hotel                    22000 non-null  object
 2   city                     22000 non-null  object
 3   country                  22000 non-null  object
 4   date                     22000 non-null  object
 5   sentiment                22000 non-null  object
 6   emotions                 22000 non-null  object
 7   topics                   22000 non-null  object
 8   dominant_topic_keywords  22000 non-null  object
dtypes: int64(1), object(8)
memory usage: 1.5+ MB


In [8]:
def load_data_from_csv(csv_path):
    
    # Load the data
    df = pd.read_csv(csv_path)
    
    # Date column in datetime format
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'])
        
        # Extract month and month name for filtering
        df['month'] = df['date'].dt.strftime('%b')
        df['month_num'] = df['date'].dt.month
    else:
        # If no date column, create dummy date from year if available
        if 'year' in df.columns:
            # Create a dummy date based on year (January 1st of that year)
            df['date'] = pd.to_datetime(df['year'], format='%Y')
            df['month'] = 'Jan'  # Default to January
            df['month_num'] = 1
    
    # Ensure all string columns are lowercase for consistent filtering
    for col in ['country', 'city', 'hotel', 'emotions', 'sentiment']:
        if col in df.columns:
            df[col] = df[col].str.lower()
    
    return df

## 2. Functions to display data

### 2.1. Topics

In [9]:
def create_top_words_by_emotion(filtered_df, selected_emotion='All Emotions'):
    
    # Apply emotion filter if selected
    if selected_emotion != 'All Emotions':
        filtered_df = filtered_df[filtered_df['emotions'] == selected_emotion.lower()]
    
    # Count word frequencies from the fully filtered dataframe
    word_counts = Counter(filtered_df['topics'])
    
    # Get top 7 words
    top_words = dict(word_counts.most_common(7))
    
    # Create horizontal bar chart
    fig = go.Figure(go.Bar(
        x=list(top_words.values()),
        y=list(top_words.keys()),
        orientation='h',
        marker_color='indigo'
    ))
    
    emotion_title = "All emotions" if selected_emotion == "All Emotions" else selected_emotion
    fig.update_layout(
        title=f"Topics: {emotion_title}",
        xaxis_title="Count",
        yaxis_title="Topics",
        height=300
    )
    
    return fig

### 2.2. Top emotions

In [10]:
def create_top_emotions_by_sentiment(filtered_df):
    
    # Group by emotion and count
    emotion_counts = filtered_df.groupby('emotions').size().reset_index(name='count')
    emotion_counts = emotion_counts.sort_values('count', ascending=False).head(7)
    
    # Define color map for emotions
    color_map = {
        'happy': '#5BC0DE', 
        'satisfied': '#5BC0DE',
        'disappointed': '#D9534F',
        'frustrated': '#D9534F',
        'angry': '#D9534F',
        'surprised': '#5BC0DE',
        'confused': '#D9534F',
        'approval': '#5BC0DE',
        'caring': '#5BC0DE',
        'annoyance': '#D9534F'
    }
    
    # Assign colors
    colors = [color_map.get(emotion.lower(), '#5BC0DE') for emotion in emotion_counts['emotions']]
    
    # Create horizontal bar chart
    fig = go.Figure(go.Bar(
        x=emotion_counts['count'],
        y=emotion_counts['emotions'],
        orientation='h',
        marker_color=colors
    ))
    
    fig.update_layout(
        title="Top Emotions",
        xaxis_title="Count",
        yaxis_title="Emotions",
        height=300
    )
    
    return fig


### 2.3. Sentiment Chart

In [11]:
def create_sentiment_chart(filtered_df):
    # Count sentiments

    sentiment_counts = filtered_df['sentiment'].value_counts()
    
    # Calculate percentages
    total = sentiment_counts.sum()
    percentages = (sentiment_counts / total * 100).round().astype(int)
    
    # Create labels with percentages
    labels = [f"{sentiment.capitalize()} ({percentage}%)" 
              for sentiment, percentage in zip(sentiment_counts.index, percentages)]
    
    # Create donut chart
    fig = go.Figure(data=[go.Pie(
        labels=labels,
        values=sentiment_counts,
        hole=0.6,
        marker_colors=['#4285F4', '#EA4335', '#5BC0DE']  # Blue, Red, Light Blue
    )])
    
    fig.update_layout(
        title="Sentiment Classification",
        annotations=[dict(text=f"{total} Reviews", x=0.5, y=0.5, font_size=15, showarrow=False)],
        height=300
    )
    
    return fig

### 2.4. Sentiment by date

In [12]:
def create_sentiment_by_date(filtered_df):
    
    # Date column in datetime format
    if 'date' in filtered_df.columns:
        filtered_df['date'] = pd.to_datetime(filtered_df['date'])
    
    # Extract year from date column
    filtered_df['year'] = filtered_df['date'].dt.year
    
    yearly_sentiment = pd.DataFrame()

    # Get unique years for indexing
    years = sorted(filtered_df['year'].unique())
    
    for sentiment in ['positive', 'negative', 'neutral']:
        # Group by year and count occurrences of each sentiment
        temp = filtered_df[filtered_df['sentiment'] == sentiment].groupby('year').size()
        yearly_sentiment[sentiment] = temp
    
    # Reindex to ensure all years are included
    yearly_sentiment = yearly_sentiment.reindex(years)
    
    # Fill missing values with 0
    yearly_sentiment = yearly_sentiment.fillna(0)

    # Create figure
    fig = go.Figure()
    
    # Add traces for each sentiment
    colors = {'positive': '#4285F4', 'negative': '#EA4335', 'neutral': '#5BC0DE'}
    
    for sentiment in yearly_sentiment.columns:
        fig.add_trace(go.Scatter(
            x=yearly_sentiment.index,
            y=yearly_sentiment[sentiment],
            mode='lines+markers',
            name=sentiment.capitalize(),
            line=dict(color=colors[sentiment])
        ))
    
    fig.update_layout(
        title="Sentiment Classification by Date",
        xaxis_title="Year",
        yaxis_title="Count",
        height=300
    )
    
    return fig

### 2.5. Word Cloud

In [18]:
def create_word_cloud(filtered_df):
    
   # Combine all keywords into a single string
    text = ' '.join(filtered_df['dominant_topic_keywords'].tolist())

    # Make sure we have text before creating the wordcloud
    if len(text.strip()) == 0:
        print("ERROR: No text available for word cloud.")
    else:
        # Create a WordCloud object
        wordcloud = WordCloud(
            width=800,
            height=400,
            background_color='white',
            colormap='viridis',
            max_words=100,
            collocations=False,
            contour_width=3,
            contour_color='steelblue'
        ).generate(text)
    
        # Display the word cloud
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.imshow(wordcloud, interpolation='bilinear')
        ax.axis('off')
        plt.tight_layout()
        
    return fig

## 3. Filter data

In [19]:
# Function to filter data based on inputs
def filter_data(df, country, city, hotel, emotion):
    filtered_df = df.copy()
    
    # Apply other filters
    if country != 'All Countries':
        filtered_df = filtered_df[filtered_df['country'] == country.lower()]
    if city != 'All Cities':
        filtered_df = filtered_df[filtered_df['city'] == city.lower()]
    if hotel != 'All Hotels':
        filtered_df = filtered_df[filtered_df['hotel'] == hotel.lower()]
    if emotion != 'All Emotions':
        filtered_df = filtered_df[filtered_df['emotions'] == hotel.lower()]
    
    return filtered_df

## 4. Update Dashboard

In [20]:
def update_dashboard(country, city, hotel, emotion):
    
    # Filter data
    filtered_df = filter_data(df, country, city, hotel, emotion)
    
    # Create visualizations
    top_words_chart = create_top_words_by_emotion(filtered_df, emotion)
    top_emotions_chart = create_top_emotions_by_sentiment(filtered_df)
    
    sentiment_chart = create_sentiment_chart(filtered_df)
    sentiment_by_date_chart = create_sentiment_by_date(filtered_df)
    
    word_cloud_chart = create_word_cloud(filtered_df)
    
    return top_words_chart, top_emotions_chart, sentiment_chart, sentiment_by_date_chart, word_cloud_chart

## 5. Create dashboard

In [21]:
def create_dashboard(csv_path="df_dashboard.csv"):
    # Load the actual data
    df = load_data_from_csv(csv_path)
    
    # Get unique values for dropdowns (with capitalized first letter for display)
    countries = ["All Countries"] + sorted(df['country'].str.capitalize().unique().tolist())
    cities = ["All Cities"] + sorted(df['city'].str.capitalize().unique().tolist())
    hotels = ["All Hotels"] + sorted(df['hotel'].str.capitalize().unique().tolist())
    emotions = ["All Emotions"] + sorted(df['emotions'].str.capitalize().unique().tolist())
    
    
    # Create the interface
    with gr.Blocks(title="Hotel Sentiment Analysis Dashboard") as dashboard:
        gr.Markdown("# Hotel Sentiment Analysis Dashboard")
        
        with gr.Row():
                country_dropdown = gr.Dropdown(label="Country:", choices=countries, value="All Countries")
                city_dropdown = gr.Dropdown(label="City:", choices=cities, value="All Cities")
                hotel_dropdown = gr.Dropdown(label="Hotel:", choices=hotels, value="All Hotels")
                emotion_dropdown = gr.Dropdown(label="Emotion:", choices=emotions, value="All Emotions")
        
        with gr.Row():
            filter_btn = gr.Button("Apply Filters", variant="primary")
            reset_btn = gr.Button("Reset")
        
        with gr.Row():
            with gr.Column():
                top_words_plot = gr.Plot(label="Topics")
            with gr.Column():
                top_emotions_plot = gr.Plot(label="Top Emotions")
            with gr.Column():
                sentiment_plot = gr.Plot(label="Sentiment Classification")
        
        with gr.Row():
            with gr.Column():
                sentiment_date_plot = gr.Plot(label="Sentiment Classification by Date")
            with gr.Column():
                word_cloud_plot = gr.Plot(label="Top Words Word Cloud")
        
        # Set up event handlers
        filter_btn.click(
            fn=update_dashboard,
            inputs=[country_dropdown, city_dropdown, hotel_dropdown, emotion_dropdown],
            outputs=[top_words_plot, top_emotions_plot, sentiment_plot, sentiment_date_plot, word_cloud_plot]
        )
        
        # Reset button handler
        def reset_filters():
            return "All Countries", "All Cities", "All Hotels", "All Emotions"
        
        reset_btn.click(
            fn=reset_filters,
            inputs=[],
            outputs=[country_dropdown, city_dropdown, hotel_dropdown, emotion_dropdown]
        )
        
        # Initialize the dashboard
        dashboard.load(
            fn=update_dashboard,
            inputs=[country_dropdown, city_dropdown, hotel_dropdown, emotion_dropdown],
            outputs=[top_words_plot, top_emotions_plot, sentiment_plot, sentiment_date_plot, word_cloud_plot]
        )
    
    return dashboard

## 6. Launch Dashboard

In [22]:
# Launch the dashboard with CSV file
if __name__ == "__main__":
    csv_path = "./df_dashboard.csv"
    dashboard = create_dashboard(csv_path)
    dashboard.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.
