# Sentiment Analysis Prediction Insights

This notebook analyzes the predictions of the sentiment analysis model on Counter-Strike 2 reviews. 
The goal is to derive insights into model performance, error patterns, and temporal trends.

**Color Scheme (Asiimov):**
- Orange: #FF9900
- Black: #1A1A1A
- White: #FFFFFF
- Grey: #5c5c5c

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# Asiimov Color Scheme
ASIIMOV_ORANGE = '#FF9900'
ASIIMOV_BLACK = '#1A1A1A'
ASIIMOV_WHITE = '#FFFFFF'
ASIIMOV_GREY = '#5c5c5c'

# Set default plotly template or update layout in functions
def apply_asiimov_theme(fig):
    fig.update_layout(
        plot_bgcolor=ASIIMOV_BLACK,
        paper_bgcolor=ASIIMOV_BLACK,
        font=dict(color=ASIIMOV_WHITE),
        title_font=dict(color=ASIIMOV_ORANGE),
        xaxis=dict(gridcolor=ASIIMOV_GREY, showgrid=True),
        yaxis=dict(gridcolor=ASIIMOV_GREY, showgrid=True),
        legend=dict(bgcolor=ASIIMOV_BLACK, bordercolor=ASIIMOV_GREY)
    )
    return fig

## 1. Load and Preprocess Data
We load the prediction results and convert timestamps to datetime objects for time-series analysis.

In [None]:
df = pd.read_csv('CS2Review_clean_predictions_100k.csv')

# Convert timestamp to datetime
df['date'] = pd.to_datetime(df['timestamp_created'], unit='s')

# Ensure voted_up is int for calculation
df['voted_up_int'] = df['voted_up'].astype(int)

# Calculate correctness
df['is_correct'] = (df['voted_up_int'] == df['predicted_label'])

print(f"Data loaded: {len(df)} rows")
df.head()

## 2. Temporal Analysis: Sentiment Decoupling
We analyze the divergence between the ground truth user sentiment (Vote) and the model's predicted sentiment based on text (Label). 
A divergence indicates periods where user ratings might not align with the textual content (e.g., review bombing, irony) or where the model performance degrades.

In [None]:
# Resample by week to smooth out noise
df_resampled = df.set_index('date').resample('W').agg({
    'voted_up_int': 'mean',
    'predicted_label': 'mean',
    'recommendationid': 'count'
}).rename(columns={'recommendationid': 'count'})

# Filter out weeks with very few reviews
df_resampled = df_resampled[df_resampled['count'] > 50]

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_resampled.index,
    y=df_resampled['voted_up_int'],
    mode='lines',
    name='Ground Truth (User Vote)',
    line=dict(color=ASIIMOV_ORANGE, width=3)
))

fig.add_trace(go.Scatter(
    x=df_resampled.index,
    y=df_resampled['predicted_label'],
    mode='lines',
    name='Model Prediction',
    line=dict(color=ASIIMOV_WHITE, width=2, dash='dot')
))

fig.update_layout(
    title='Decoupling Analysis: Ground Truth vs Model Prediction (Weekly)',
    xaxis_title='Date',
    yaxis_title='Positive Rate',
    yaxis_tickformat='.0%'
)

apply_asiimov_theme(fig)
fig.show()

## 3. Disagreement Rate Over Time
This visualization highlights when the model disagrees most with the user votes. High disagreement peaks may correspond to game updates or events where user sentiment is complex (e.g., mixed feelings).

In [None]:
df['disagreement'] = (df['voted_up_int'] != df['predicted_label']).astype(int)
df_disagreement = df.set_index('date').resample('W')['disagreement'].mean()

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_disagreement.index,
    y=df_disagreement,
    mode='lines',
    name='Disagreement Rate',
    fill='tozeroy',
    line=dict(color=ASIIMOV_ORANGE, width=2),
    fillcolor='rgba(255, 153, 0, 0.2)'
))

fig.update_layout(
    title='Model Disagreement Rate Over Time',
    xaxis_title='Date',
    yaxis_title='Disagreement Rate',
    yaxis_tickformat='.0%'
)

apply_asiimov_theme(fig)
fig.show()

## 4. Confidence Analysis: Correct vs Incorrect Predictions
Are errors caused by low confidence (uncertainty) or high confidence (wrong interpretation)? We compare the distribution of the model's `positive_probability` for correct and incorrect predictions.

In [None]:
correct_probs = df[df['is_correct']]['positive_probability']
incorrect_probs = df[~df['is_correct']]['positive_probability']

fig = go.Figure()

fig.add_trace(go.Histogram(
    x=correct_probs,
    name='Correct Predictions',
    marker_color=ASIIMOV_GREY,
    opacity=0.7,
    xbins=dict(start=0, end=1, size=0.05)
))

fig.add_trace(go.Histogram(
    x=incorrect_probs,
    name='Incorrect Predictions',
    marker_color=ASIIMOV_ORANGE,
    opacity=0.7,
    xbins=dict(start=0, end=1, size=0.05)
))

fig.update_layout(
    title='Probability Distribution: Correct vs Incorrect',
    xaxis_title='Predicted Positive Probability',
    yaxis_title='Count',
    barmode='overlay'
)

apply_asiimov_theme(fig)
fig.show()

## 5. Review Length vs Prediction Accuracy
Does the model perform better on longer, more detailed reviews compared to short ones?

In [None]:
df['review_length'] = df['clean_review'].astype(str).str.len()

# Bin review lengths (0-50, 50-100, 100-200, 200-500, 500+)
bins = [0, 50, 100, 200, 500, 10000]
labels = ['0-50', '50-100', '100-200', '200-500', '500+']
df['length_bin'] = pd.cut(df['review_length'], bins=bins, labels=labels)

accuracy_by_len = df.groupby('length_bin', observed=False)['is_correct'].mean().reset_index()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=accuracy_by_len['length_bin'],
    y=accuracy_by_len['is_correct'],
    marker_color=ASIIMOV_ORANGE,
    name='Accuracy'
))

fig.update_layout(
    title='Prediction Accuracy by Review Length (Characters)',
    xaxis_title='Review Length',
    yaxis_title='Accuracy',
    yaxis_tickformat='.1%'
)

apply_asiimov_theme(fig)
fig.show()