In [9]:
from __future__ import annotations

import io
from typing import Dict, List

import altair as alt
import numpy as np
import pandas as pd


def load_questionnaire_data(
  data: str, questions: List[str], participant_prefix: str = "P", separator: str = ","
) -> pd.DataFrame:
  """Load questionnaire data from string into DataFrame."""
  df = pd.read_csv(io.StringIO(data), names=questions, sep=separator)
  df.index = [f"{participant_prefix}{i+1}" for i in range(len(df))]  # type: ignore
  return df


def create_response_mapping(responses: List[str]) -> Dict[str, int]:
  """Create mapping between text responses and numeric values."""
  return {text: i + 1 for i, text in enumerate(responses)}


def calculate_statistics(df: pd.DataFrame) -> pd.DataFrame:
  """Calculate mean and standard deviation for each question."""
  stats = pd.DataFrame({"AVG": df.mean(), "STD": df.std()}).round(2)
  stats["Stats"] = "AVG: " + stats["AVG"].astype(str) + "\n, STD: " + stats["STD"].astype(str)
  return stats.reset_index().rename(columns={"index": "Question"})


def process_questionnaire_data(
  df: pd.DataFrame, reverse_questions: List[str] | None = None, max_value: int = 5
) -> pd.DataFrame:
  """Process questionnaire data and prepare for visualization."""
  if reverse_questions:
    df[reverse_questions] = max_value + 1 - df[reverse_questions]

  df_melted = df.reset_index().melt(id_vars="index", var_name="Question", value_name="Value")
  df_counts = df_melted.groupby(["Question", "Value"]).size().reset_index(name="Count")  # type: ignore

  total_counts = df_counts.groupby("Question")["Count"].transform("sum")
  df_counts["Percentage"] = (df_counts["Count"] / total_counts) * 100
  df_counts["cumsum"] = df_counts.groupby("Question")["Percentage"].cumsum()
  df_counts["mid"] = df_counts["cumsum"] - (df_counts["Percentage"] / 2)

  return df_counts


def create_stacked_bar_visualization(
  df_counts: pd.DataFrame,
  response_mapping: Dict[str, int],
  questions: List[str],
  stats_df: pd.DataFrame,
  color_range: List[str] = ["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"],
  width: int = 180,
) -> alt.LayerChart:
  """Create stacked bar visualization with statistics for questionnaire responses."""
  value_to_text = {v: k for k, v in response_mapping.items()}
  df_counts["Response"] = df_counts["Value"].map(value_to_text)

  color_scale = alt.Scale(domain=list(response_mapping.keys()), range=color_range)

  # Main bars
  bars = (
    alt.Chart(df_counts)
    .mark_bar()
    .encode(
      x=alt.X(
        "Percentage:Q",
        title="Response Distribution (%)",
        scale=alt.Scale(domain=[0, 100]),
        axis=alt.Axis(format=".0f"),
      ),
      y=alt.Y("Question:N", title="Questionnaire Items", sort=questions),
      color=alt.Color(
        "Response:O",
        scale=color_scale,
        legend=None,
      ),
      order=alt.Order("Value:O", sort="ascending"),
    )
  )

  # Response counts
  text = (
    alt.Chart(df_counts)
    .mark_text(align="center", baseline="middle", fontSize=10, fontWeight=700, font="Open Sans")
    .encode(
      x=alt.X("mid:Q"),
      y=alt.Y("Question:N", sort=questions),
      text=alt.Text("Count:Q"),
      order=alt.Order("Value:O", sort="ascending"),
      color=alt.condition(
        (alt.datum.Value == 1) | (alt.datum.Value == 5), alt.value("white"), alt.value("black")
      ),
    )
  )

  # Statistics text
  stats = (
    alt.Chart(stats_df)
    .mark_text(align="left", baseline="middle", fontSize=10)
    .encode(
      x=alt.value(width + 10),  # Fixed position after the bars
      y=alt.Y("Question:N", sort=questions),
      text="Stats:N",
    )
  )

  return (bars + text + stats).properties(width=width)


In [13]:
sus = """
4,1,5,1,2,1,5,1,5,1
4,2,4,2,4,2,4,2,4,3
2,2,5,2,4,3,5,1,4,3
3,1,5,2,4,1,5,1,4,1
4,2,5,3,5,1,5,1,5,2
2,1,4,2,3,2,5,2,4,1
5,4,5,1,4,1,5,1,5,1
3,1,5,2,4,1,5,1,4,1
4,3,3,2,2,1,4,2,4,2
4,1,5,1,5,1,5,1,5,2
4,1,5,1,5,2,4,1,5,1
3,2,4,1,4,1,4,2,4,1
3,2,4,2,4,1,5,1,3,1
4,1,5,2,5,1,5,1,5,1
"""

# sus_questions = [
#   "Q1. I think that I would like to use this system frequently.",
#   "Q2. I found the system unnecessarily complex.",
#   "Q3. I thought the system was easy to use.",
#   "Q4. I think that I would need the support of a technical person to be able to use this system.",
#   "Q5. I found the various functions in this system were well integrated.",
#   "Q6. I thought there was too much inconsistency in this system.",
#   "Q7. I would imagine that most people would learn to use this system very quickly.",
#   "Q8. I found the system very cumbersome to use.",
#   "Q9. I felt very confident using the system.",
#   "Q10. I needed to learn a lot of things before I could get going with this system.",
# ]
sus_questions = [f"Q{i+1}" for i in range(10)]
responses = [
  "Strongly disagree",
  "Disagree",
  "Neither agree nor disagree",
  "Agree",
  "Strongly agree",
]

# Load and process data
df = load_questionnaire_data(sus, sus_questions)
response_mapping = create_response_mapping(responses)

# Calculate statistics
stats_df = calculate_statistics(df)

# Process data for visualization
df_counts_exta = process_questionnaire_data(df)

# Create visualization
create_stacked_bar_visualization(df_counts_exta, response_mapping, sus_questions, stats_df, width=300)


In [11]:
extra = """
5,5,5,5,5,5,5
4,4,5,5,4,4,4
4,4,4,4,4,3,5
5,4,5,5,3,5,5
5,5,5,5,5,4,5
4,2,4,4,4,4,5
5,4,5,4,5,5,5
4,4,4,4,4,4,5
5,5,5,5,5,2,5
5,5,5,4,5,5,5
5,5,5,5,5,5,5
5,4,5,5,4,5,5
5,4,4,4,5,4,5
5,5,5,5,5,4,5
"""

extra_questions = [
  "Leanability (self-learning session)",
  "Confidence (self-learning session)",
  "Learnability (explore session)",
  "Confidence (explore session)",
  "Provided chart was helpful for exploration",
  "Chart in Swipytics was easy to interpret",
  "Swipytics was easy to use in smartphone",
]


# Load and process data
df_extra = load_questionnaire_data(extra, extra_questions)

df_learnability = df_extra[
  ["Leanability (self-learning session)", "Learnability (explore session)"]
]
df_confidence = df_extra[["Confidence (self-learning session)", "Confidence (explore session)"]]
df_extra = df_extra.drop(
  columns=[
    "Leanability (self-learning session)",
    "Learnability (explore session)",
    "Confidence (self-learning session)",
    "Confidence (explore session)",
  ]
)

q_leanability = ["Leanability (self-learning session)", "Learnability (explore session)"]
q_confidence = ["Confidence (self-learning session)", "Confidence (explore session)"]
q_extra = extra_questions


stats_df_learnability = calculate_statistics(df_learnability)
stats_df_confidence = calculate_statistics(df_confidence)
stats_df_extra = calculate_statistics(df_extra)

# Process data for visualization
df_counts_learnability = process_questionnaire_data(df_learnability)
df_counts_confidence = process_questionnaire_data(df_confidence)
df_counts_exta = process_questionnaire_data(df_extra)


display(
  create_stacked_bar_visualization(
    df_counts_exta, response_mapping, extra_questions, stats_df_extra
  )
)
display(
  create_stacked_bar_visualization(
    df_counts_learnability, response_mapping, q_leanability, stats_df_learnability
  )
)
display(
  create_stacked_bar_visualization(
    df_counts_confidence, response_mapping, q_confidence, stats_df_confidence
  )
)