In [None]:
%pip install altair

In [None]:
from pathlib import Path
import polars as pl
from dtale import show
import dtale.global_state as global_state

global_state.set_app_settings(dict(max_column_width=300))

data_dir = Path().absolute() / ".." / "data"
df = pl.read_parquet(data_dir / "dagster/whatsapp_chunks_sentiment/cm0i27jdj0000aqpa73ghpcxf.snappy")


In [None]:
import polars as pl
import altair as alt

df = df.filter(pl.col("raw_analysis").is_not_null())

# Explode and unnest sentiment_me
df_me = (
    df
    .explode("sentiment_me")
    .unnest("sentiment_me")  # now we get columns: from, to, sentiment
    .filter(pl.col("sentiment").is_not_null())
    .with_columns(
        pl.col("sentiment").cast(pl.Float64, strict=False).alias("sentiment_value")  # ensure float
    )
    .with_columns(
        pl.col("from").str.to_datetime().alias("timestamp_from"),
        pl.col("to").str.to_datetime().alias("timestamp_to"),
    )
    .with_columns(pl.lit("me").alias("speaker"))
    .select([
        "chunk_id",
        "timestamp_from",
        "timestamp_to",
        "sentiment_value",
        "speaker"
    ])
)

# Explode and unnest sentiment_partner
df_partner = (
    df
    .explode("sentiment_partner")
    .unnest("sentiment_partner")  # columns: from, to, sentiment
    .filter(pl.col("sentiment").is_not_null())
    .with_columns(
        pl.col("sentiment").cast(pl.Float64, strict=False).alias("sentiment_value")  # from string -> float
    )
    .with_columns(
        pl.col("from").str.to_datetime().alias("timestamp_from"),
        pl.col("to").str.to_datetime().alias("timestamp_to"),
    )
    .with_columns(pl.lit("partner").alias("speaker"))
    .select([
        "chunk_id",
        "timestamp_from",
        "timestamp_to",
        "sentiment_value",
        "speaker"
    ])
)

# Stack them together to get a "long" form
df_long = df_me.vstack(df_partner)


In [None]:
pdf_long_expanded = pl.concat([
    df_long.with_columns(pl.col("timestamp_from").alias("timestamp")),
    df_long.with_columns(pl.col("timestamp_to").alias("timestamp"))
]).sort("timestamp")

chart = (
    alt.Chart(pdf_long_expanded.to_pandas())
    .transform_window(
        rolling_mean='mean(sentiment_value)',
        frame=[-10, 10],
        groupby=['speaker']
    )
    .mark_line()
    .encode(
        x=alt.X("timestamp:T", title="Time"),
        y=alt.Y("rolling_mean:Q", title="Sentiment (Smoothed)"),
        color=alt.Color("speaker:N", 
            scale=alt.Scale(scheme="set1"),
            legend=alt.Legend(
                title="Speaker",
                orient="right",
                fillColor="white",
                padding=10,
                strokeColor="gray",
                cornerRadius=5
            )
        ),
        tooltip=["speaker", "rolling_mean:Q", 
                alt.Tooltip("timestamp:T", title="Time")]
    )
    .properties(width=800, height=400)
    .interactive()
)
chart.show()