In [1]:
import plotly.express as px
import pandas as pd

In [4]:
data=pd.read_excel("sentimentdataset_clean.xlsx")

# 1. Yearly distribution of sentiments

In [47]:
# sentiments across year
sentiment_counts = data.groupby("Year")["General_Sentiment"].value_counts().unstack().fillna(0)

In [48]:
fig = px.bar(
    sentiment_counts,
    x=sentiment_counts.index,
    y=sentiment_counts.columns,
    title="Sentiment Distribution Across Years",
    labels={"value": "Count", "Year": "Year"},
    color_discrete_sequence=px.colors.qualitative.Pastel
)

fig.show()

# here we can talk about how there is more and more available data over the years
# and how most data in this dataset stems from year 2023

# 2. Monthly distribution of sentiments

In [39]:
# start here for month
sentiment_melted = sentiment_counts.reset_index().melt(id_vars='Month', value_vars=sentiment_counts.columns)

In [42]:
fig = px.bar(
    sentiment_melted,
    x='Month',
    y='value',
    color='General_Sentiment',
    title="Sentiment Distribution Across Months",
    labels={"value": "Count", "Month": "Month"},
    color_discrete_sequence=px.colors.sequential.Viridis,
    text='General_Sentiment'
)

fig.show()
# talk about more and less "active" months

# 3. Daily distribution of sentiments

In [15]:
# day
sentiment_counts = data.groupby("Day")["General_Sentiment"].value_counts().unstack().fillna(0)

In [44]:
fig = px.bar(
    sentiment_counts,
    x=sentiment_counts.index,
    y=sentiment_counts.columns,
    title="Sentiment Distribution Across Days",
    labels={"value": "Count", "Day": "Day"},
    color_discrete_sequence=px.colors.qualitative.D3
)

fig.show()
# talk about popular days for social media posts

# 4. Hourly distribution of sentiments

In [18]:
# hour
sentiment_counts = data.groupby("Hour")["General_Sentiment"].value_counts().unstack().fillna(0)

In [45]:
fig = px.bar(
    sentiment_counts,
    x=sentiment_counts.index,
    y=sentiment_counts.columns,
    title="Sentiment Distribution Across Hours",
    labels={"value": "Count", "Hours": "Hours"},
    color_discrete_sequence=px.colors.qualitative.Dark24
)


fig.show()
# social media activity during the day

# Additional: Sentiments per platform

In [20]:
# platform
platform_sentiment_counts = data.groupby(['Platform', 'General_Sentiment']).size().reset_index(name='Count')

In [24]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

platforms = platform_sentiment_counts['Platform'].unique()

cols = 2
rows = (len(platforms) + 1) // cols

fig = make_subplots(rows=rows, cols=cols, specs=[[{'type':'domain'}]*cols]*rows,
                    subplot_titles=platforms)

for i, platform in enumerate(platforms):
    platform_data = platform_sentiment_counts[platform_sentiment_counts['Platform'] == platform]
    row = i // cols + 1
    col = i % cols + 1
    
    fig.add_trace(
        go.Pie(
            labels=platform_data['General_Sentiment'],
            values=platform_data['Count'],
            name=platform
        ),
        row=row, col=col
    )

fig.update_layout(
    title_text='Sentiment Distribution Across Platforms',
    height=400 * rows,
)

fig.update_traces(textposition='inside', textinfo='percent+label')

fig.show()

# Additional: Sentiments per county

In [25]:
import pandas as pd
import plotly.express as px

In [27]:
# sentiments/country
country_sentiment_counts = data.groupby(['Country', 'General_Sentiment']).size().reset_index(name='Count')

In [46]:
fig = px.treemap(
    country_sentiment_counts,
    path=['Country', 'General_Sentiment'],
    values='Count',
    title='Sentiment Distribution by Country',
    color='General_Sentiment',
    color_discrete_sequence=px.colors.qualitative.Set3
)

fig.show()
# also reflects the amount of data per countly