# Integrated Analysis and Visualization

This notebook integrates the analysis and visualization of four experiments: WSLS vs Baseline, Feedback vs Baseline, Payoff vs Baseline, and WSLS vs Payoff.

To run this notebook, ensure you have the following R packages installed:
- `tidyverse`
- `ggpubr`
- `dplyr`

Also, make sure all data files (e.g., `baseline_allresult_processed.csv`) are in the working directory.

In [None]:
# Load necessary packages
library(tidyverse)
library(ggpubr)
library(dplyr)

# Define a common Wilcoxon test function for multiple uses
wilcox_test <- function(x, y, alternative) {
  test <- wilcox.test(x, y, alternative = alternative)
  return(test$p.value)
}

## Part 1: WSLS vs Baseline Analysis

This section compares the WSLS strategy against the Baseline, analyzing schema selection, reaction times, observation counts, attention shifts, performance, and accuracy distribution.

In [None]:
# Data loading
baseline_data_wsls <- read_csv("baseline_allresult_processed.csv")
WSLS_data <- read_csv("WSLS_allresult_processed.csv")
baseline_data_wsls$group <- "Baseline"
WSLS_data$group <- "WSLS"
all_data_wsls <- bind_rows(baseline_data_wsls, WSLS_data) %>%
  mutate(Subject = paste0(group, "_", Subject)) %>%
  arrange(Subject, Round, Phase)

# Panel A: Distribution of Consecutive Schema Selections (Histogram with p-value, complete Baseline)
streak_data <- all_data_wsls %>%
  group_by(Subject, Round) %>%
  summarise(Schema_Phase1 = first(Schema), Schema_Phase2 = last(Schema), group = first(group)) %>%
  arrange(Subject, Round) %>%
  group_by(Subject) %>%
  mutate(
    is_continuous = (Schema_Phase1 == lag(Schema_Phase1, default = NA) |
                       Schema_Phase1 == lag(Schema_Phase2, default = NA) |
                       Schema_Phase2 == lag(Schema_Phase1, default = NA) |
                       Schema_Phase2 == lag(Schema_Phase2, default = NA)),
    streak_start = !is_continuous | is.na(is_continuous),
    streak_id = cumsum(streak_start)
  ) %>%
  group_by(Subject, streak_id) %>%
  summarise(streak_length = n(), group = first(group)) %>%
  ungroup()
streak_freq <- streak_data %>%
  group_by(group, streak_length) %>%
  summarise(count = n(), .groups = "drop") %>%
  complete(group, streak_length = 1:max(streak_length), fill = list(count = 0))
baseline_streaks <- streak_data %>% filter(group == "Baseline") %>% pull(streak_length)
wsls_streaks <- streak_data %>% filter(group == "WSLS") %>% pull(streak_length)
ks_result <- ks.test(baseline_streaks, wsls_streaks)
p_value <- ks_result$p.value
p_label <- ifelse(p_value < 0.001, "p < 0.001", sprintf("p = %.3f", p_value))
p1 <- ggplot(streak_freq, aes(x = streak_length, y = count, fill = group)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(values = c("Baseline" = "#1F77B4", "WSLS" = "#FF7F0E")) +
  labs(title = "Sequential Schema Selection in WSLS", x = "Consecutive Number", y = "Sequence Number") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold")) +
  annotate("text", x = Inf, y = Inf, label = p_label, hjust = 1.1, vjust = 1.1, size = 4)

# Display the plot
print(p1)
ggsave("Sequential_schema_WSLSvsBaseline.png", p1, width = 5, height = 7, dpi = 300)

In [None]:
# WSLS Classification and Comparison with Baseline
wsls_classified <- WSLS_data %>%
  arrange(Subject, Round, Phase) %>%
  group_by(Subject) %>%
  mutate(
    prev_Schema1 = lag(Schema, n = 2, default = NA),
    prev_Schema2 = lag(Schema, n = 1, default = NA),
    prev_AC1 = lag(AC, n = 2, default = NA),
    prev_AC2 = lag(AC, n = 1, default = NA)
  ) %>%
  mutate(
    same_schema = (Schema == prev_Schema1 | Schema == prev_Schema2),
    prev_AC = case_when(
      Schema == prev_Schema1 & Schema != prev_Schema2 ~ prev_AC1,
      Schema == prev_Schema2 & Schema != prev_Schema1 ~ prev_AC2,
      Schema == prev_Schema1 & Schema == prev_Schema2 ~ pmax(prev_AC1, prev_AC2, na.rm = TRUE),
      TRUE ~ NA_real_
    ),
    prev_AC_no_match = ifelse(same_schema, NA_real_, prev_AC2),
    condition = case_when(
      is.na(prev_Schema1) | is.na(prev_Schema2) ~ NA_character_,
      same_schema & prev_AC == 1 ~ "Same, AC=1",
      same_schema & prev_AC != 1 ~ "Same, AC!=1",
      !same_schema & prev_AC_no_match == 1 ~ "Diff, AC=1",
      !same_schema & prev_AC_no_match != 1 ~ "Diff, AC!=1"
    )
  ) %>%
  ungroup()

# Combine WSLS and Baseline data
analysis_data <- bind_rows(
  wsls_classified %>% select(condition, Schema_RT, Schema_OB, Schema_AS) %>% filter(!is.na(condition)),
  baseline_data_wsls %>% mutate(condition = "Baseline") %>% select(condition, Schema_RT, Schema_OB, Schema_AS)
) %>%
  filter(!is.na(Schema_RT), !is.na(Schema_OB), !is.na(Schema_AS))

# Verify group sizes
print(analysis_data %>% group_by(condition) %>% summarise(n = n()))

# Panel B: Violin Plot for Schema_RT
p2 <- ggplot(analysis_data, aes(x = condition, y = Schema_RT, fill = condition)) +
  geom_violin(trim = TRUE) +
  scale_fill_manual(values = c("Same, AC=1" = "#FF7F0E", "Same, AC!=1" = "#D62728", 
                               "Diff, AC=1" = "#2CA02C", "Diff, AC!=1" = "#9467BD", 
                               "Baseline" = "#1F77B4")) +
  labs(title = "Schema Reaction Times in WSLS", x = "Condition", y = "Schema Reaction Time (s)") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Kruskal-Wallis test and add p-value
kruskal_result_rt <- kruskal.test(Schema_RT ~ condition, data = analysis_data)
p_value_rt <- kruskal_result_rt$p.value
p_label_rt <- ifelse(p_value_rt < 0.001, "p < 0.001", sprintf("p = %.3f", p_value_rt))
p2 <- p2 + annotate("text", x = Inf, y = Inf, label = p_label_rt, hjust = 1.1, vjust = 1.1, size = 4)

# Display the plot
print(p2)
ggsave("Schema_RT_WSLSvsBaseline.png", p2, width = 5, height = 7, dpi = 300)

In [None]:
# Panel C: Violin Plot for Schema_OB
p3 <- ggplot(analysis_data, aes(x = condition, y = Schema_OB, fill = condition)) +
  geom_violin(trim = TRUE) +
  scale_fill_manual(values = c("Same, AC=1" = "#FF7F0E", "Same, AC!=1" = "#D62728", 
                               "Diff, AC=1" = "#2CA02C", "Diff, AC!=1" = "#9467BD", 
                               "Baseline" = "#1F77B4")) +
  labs(title = "Schema Observation Counts in WSLS", x = "Condition", y = "Schema Observation Counts") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Kruskal-Wallis test and add p-value
kruskal_result_ob <- kruskal.test(Schema_OB ~ condition, data = analysis_data)
p_value_ob <- kruskal_result_ob$p.value
p_label_ob <- ifelse(p_value_ob < 0.001, "p < 0.001", sprintf("p = %.3f", p_value_ob))
p3 <- p3 + annotate("text", x = Inf, y = Inf, label = p_label_ob, hjust = 1.1, vjust = 1.1, size = 4)

# Display the plot
print(p3)
ggsave("Schema_OB_analysis_WSLSvsBaseline.png", p3, width = 5, height = 7, dpi = 300)

In [None]:
# Panel D: Violin Plot for Schema_AS
p4 <- ggplot(analysis_data, aes(x = condition, y = Schema_AS, fill = condition)) +
  geom_violin(trim = TRUE) +
  scale_fill_manual(values = c("Same, AC=1" = "#FF7F0E", "Same, AC!=1" = "#D62728", 
                               "Diff, AC=1" = "#2CA02C", "Diff, AC!=1" = "#9467BD", 
                               "Baseline" = "#1F77B4")) +
  labs(title = "Schema Attention Shifts in WSLS", x = "Condition", y = "Schema Attention Shifts") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Kruskal-Wallis test and add p-value
kruskal_result_as <- kruskal.test(Schema_AS ~ condition, data = analysis_data)
p_value_as <- kruskal_result_as$p.value
p_label_as <- ifelse(p_value_as < 0.001, "p < 0.001", sprintf("p = %.3f", p_value_as))
p4 <- p4 + annotate("text", x = Inf, y = Inf, label = p_label_as, hjust = 1.1, vjust = 1.1, size = 4)

# Display the plot
print(p4)
ggsave("Schema_AS_analysis_WSLSvsBaseline.png", p4, width = 5, height = 7, dpi = 300)

In [None]:
# Panel E: Average Performance Per Round
performance_by_round_wsls <- all_data_wsls %>%
  group_by(group, Round) %>%
  summarise(
    mean_performance = mean(performance, na.rm = TRUE),
    se_performance = sd(performance, na.rm = TRUE) / sqrt(n())
  )
p5 <- ggplot(performance_by_round_wsls, aes(x = Round, y = mean_performance, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), alpha = 0.2, linetype = 0) +
  scale_color_manual(values = c("Baseline" = "#1F77B4", "WSLS" = "#FF7F0E")) +
  labs(title = "Average Performance For Each Round", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Two-sided Wilcoxon test for performance differences vs Baseline
baseline_performance_wsls <- all_data_wsls %>% filter(group == "Baseline") %>% select(performance)
WSLS_performance <- all_data_wsls %>% filter(group == "WSLS") %>% select(performance)
p_WSLS <- wilcox_test(WSLS_performance$performance, baseline_performance_wsls$performance, "two.sided")
cat("WSLS vs Baseline (two-sided test): p =", p_WSLS, "\n")

# Display the plot
print(p5)
ggsave("Perform_WSLSvsBaseline.png", p5, width = 8, height = 7, dpi = 300)

In [None]:
# Panel F: AC Distribution Per Round (Bubble Plot)
ac_freq_wsls <- all_data_wsls %>%
  group_by(group, Round, AC) %>%
  summarise(count = n(), .groups = "drop") %>%
  filter(!is.na(AC))
p6 <- ggplot(ac_freq_wsls, aes(x = Round, y = AC, size = count, color = group)) +
  geom_point(position = position_dodge(width = 0.8), alpha = 0.7) +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("Baseline" = "#1F77B4", "WSLS" = "#FF7F0E")) +
  labs(title = "Accuracy Distribution (WSLS vs Baseline)", x = "Round", y = "Schema Accuracy", size = "Number", color = "Group") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p6)
ggsave("AC_WSLSvsBaseline.png", p6, width = 8, height = 7, dpi = 300)

In [None]:
# Combine violin plots into a single figure
long_data <- analysis_data %>%
  pivot_longer(
    cols = c(Schema_RT, Schema_OB, Schema_AS),
    names_to = "metric",
    values_to = "value"
  ) %>%
  mutate(
    metric = factor(metric, 
                    levels = c("Schema_RT", "Schema_OB", "Schema_AS"),
                    labels = c("Reaction Time (s)", "Observation Counts", "Attention Shifts"))
  )

y_limits <- long_data %>%
  group_by(metric) %>%
  summarise(
    min_val = min(value, na.rm = TRUE),
    max_val = max(value, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    y_min = pmax(0, min_val - 0.1 * (max_val - min_val)),
    y_max = max_val + 0.1 * (max_val - min_val)
  )

combined_violin <- ggplot(long_data, aes(x = condition, y = value, fill = condition)) +
  geom_violin(trim = TRUE) +
  scale_fill_manual(values = c("Same, AC=1" = "#FF7F0E", "Same, AC!=1" = "#D62728", 
                               "Diff, AC=1" = "#2CA02C", "Diff, AC!=1" = "#9467BD", 
                               "Baseline" = "#1F77B4")) +
  facet_wrap(~ metric, scales = "free_y", ncol = 1) +
  labs(title = "Schema Performance Metrics", x = "Condition", y = NULL) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    strip.text = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

kw_tests <- long_data %>%
  group_by(metric) %>%
  summarise(
    p_value = kruskal.test(value ~ condition)$p.value,
    .groups = "drop"
  ) %>%
  mutate(
    p_label = ifelse(p_value < 0.001, "p < 0.001", sprintf("p = %.3f", p_value)),
    y_pos = y_limits$y_max[match(metric, y_limits$metric)]
  )

combined_violin <- combined_violin +
  geom_text(
    data = kw_tests,
    aes(x = Inf, y = y_pos, label = p_label),
    hjust = 1.1, vjust = 1.1, size = 3.5,
    inherit.aes = FALSE
  )

# Display the plot
print(combined_violin)

In [None]:
# Combine all plots for WSLS vs Baseline
combined_plot_wsls <- ggarrange(p1, p2, p3, p4, p5, p6,
                                labels = c("A", "B", "C", "D", "E", "F"),
                                ncol = 2, nrow = 3,
                                heights = c(1, 1, 1),
                                common.legend = FALSE)
ggsave("combined_analysis_WSLSvsBaseline.png", combined_plot_wsls, width = 10, height = 12, dpi = 300)

# Display the combined plot
print(combined_plot_wsls)

## Part 2: Feedback vs Baseline Analysis

This section analyzes the impact of emotional feedback (Feedback, Excitement, Depression) on performance and reaction times compared to the Baseline.

In [None]:
# Data loading
baseline_data_feedback <- read_csv("baseline_allresult_processed.csv")
feedback_data <- read_csv("feedback_allresult_processed.csv")
excitement_data <- read_csv("excitement_allresult_processed.csv")
depression_data <- read_csv("depression_allresult_processed.csv")
baseline_data_feedback$group <- "Baseline"
feedback_data$group <- "Feedback"
excitement_data$group <- "Excitement"
depression_data$group <- "Depression"
all_data_feedback <- bind_rows(baseline_data_feedback, feedback_data, excitement_data, depression_data) %>%
  mutate(Subject = paste0(group, "_", Subject)) %>%
  arrange(Subject, Round, Phase)

# Panel A: Average Performance Per Round
performance_by_round_feedback <- all_data_feedback %>%
  group_by(group, Round) %>%
  summarise(
    mean_performance = mean(performance, na.rm = TRUE),
    se_performance = sd(performance, na.rm = TRUE) / sqrt(n())
  )
p7 <- ggplot(performance_by_round_feedback, aes(x = Round, y = mean_performance, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline" = "#1F77B4", "Feedback" = "#FF7F0E", "Depression" = "#2CA02C", "Excitement" = "#D62728")) +
  labs(title = "Average Performance For Each Round", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p7)
ggsave("perform_FeedbackvsBaseline.png", p7, width = 7, height = 6, dpi = 300)

In [None]:
# Panel B: Violin Plot Comparing Reaction Times Across Groups
p8 <- ggplot(all_data_feedback, aes(x = group, y = Schema_RT, fill = group)) +
  geom_violin(trim = TRUE) +
  scale_fill_manual(values = c("Baseline" = "#1F77B4", "Feedback" = "#FF7F0E", "Depression" = "#2CA02C", "Excitement" = "#D62728")) +
  labs(title = "Reaction Times under Different Emotional Feedback", x = "Emotion Group", y = "Schema Reaction Time (s)") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Kruskal-Wallis test and add p-value
kruskal_result_rt_feedback <- kruskal.test(Schema_RT ~ group, data = all_data_feedback)
p_value_rt_feedback <- kruskal_result_rt_feedback$p.value
p_label_rt_feedback <- ifelse(p_value_rt_feedback < 0.001, "p < 0.001", sprintf("p = %.3f", p_value_rt_feedback))
p8 <- p8 + annotate("text", x = Inf, y = Inf, label = p_label_rt_feedback, hjust = 1.1, vjust = 1.1, size = 4)

# Display the plot
print(p8)
ggsave("reaction_time_FeedbackvsBaseline.png", p8, width = 5, height = 7, dpi = 300)

In [None]:
# Panel C: Average Emotional Factor Per Round
emotion_factor_by_round <- all_data_feedback %>%
  filter(Round <= 6) %>%
  group_by(group, Round) %>%
  summarise(
    mean_emotion_factor = mean(emotion_factor, na.rm = TRUE),
    se_emotion_factor = sd(emotion_factor, na.rm = TRUE) / sqrt(n())
  )
p9 <- ggplot(emotion_factor_by_round, aes(x = Round, y = mean_emotion_factor, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_emotion_factor - se_emotion_factor, ymax = mean_emotion_factor + se_emotion_factor), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline" = "#1F77B4", "Feedback" = "#FF7F0E", "Depression" = "#2CA02C", "Excitement" = "#D62728")) +
  labs(title = "Average Emotional Factor For Each Round", x = "Round", y = "Average Emotional Factor") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p9)

In [None]:
# Panel D: Dual-Axis Plot (Performance and Emotional Factor)
p10 <- ggplot() +
  geom_line(data = performance_by_round_feedback, aes(x = Round, y = mean_performance, color = group), size = 1) +
  geom_ribbon(data = performance_by_round_feedback, aes(x = Round, ymin = mean_performance - se_performance, ymax = mean_performance + se_performance, fill = group), alpha = 0.08, linetype = 0) +
  geom_line(data = emotion_factor_by_round, aes(x = Round, y = mean_emotion_factor * 10, color = group), linetype = "dashed", size = 1) +
  geom_ribbon(data = emotion_factor_by_round, aes(x = Round, ymin = (mean_emotion_factor - se_emotion_factor) * 10, ymax = (mean_emotion_factor + se_emotion_factor) * 10, fill = group), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline" = "#1F77B4", "Feedback" = "#FF7F0E", "Depression" = "#2CA02C", "Excitement" = "#D62728")) +
  scale_fill_manual(values = c("Baseline" = "#1F77B4", "Feedback" = "#FF7F0E", "Depression" = "#2CA02C", "Excitement" = "#D62728")) +
  labs(title = "Average Performance and Emotional Changes", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold")) +
  theme(axis.title.y = element_text(color = "black")) +
  scale_y_continuous(sec.axis = sec_axis(~ . / 10, name = "Average Emotional Factor")) +
  theme(axis.title.y.right = element_text(color = "black"))

# Display the plot
print(p10)
ggsave("combined_performance_emotion.png", p10, width = 8, height = 7, dpi = 300)

In [None]:
# Compare reaction times of each group with Baseline
baseline_rt <- all_data_feedback %>% filter(group == "Baseline") %>% select(Schema_RT)
feedback_rt <- all_data_feedback %>% filter(group == "Feedback") %>% select(Schema_RT)
excitement_rt <- all_data_feedback %>% filter(group == "Excitement") %>% select(Schema_RT)
depression_rt <- all_data_feedback %>% filter(group == "Depression") %>% select(Schema_RT)

p_feedback_rt <- wilcox_test(feedback_rt$Schema_RT, baseline_rt$Schema_RT, "two.sided")
p_excitement_rt <- wilcox_test(excitement_rt$Schema_RT, baseline_rt$Schema_RT, "less")
p_depression_rt <- wilcox_test(depression_rt$Schema_RT, baseline_rt$Schema_RT, "greater")

cat("Baseline vs Feedback (two-sided test, reaction time): p =", p_feedback_rt, "\n")
cat("Baseline vs Excitement (one-tailed test, reaction time > Baseline): p =", p_excitement_rt, "\n")
cat("Baseline vs Depression (one-tailed test, reaction time < Baseline): p =", p_depression_rt, "\n")

# Compare performance of each group with Baseline
baseline_performance_feedback <- all_data_feedback %>% filter(group == "Baseline") %>% select(performance)
feedback_performance <- all_data_feedback %>% filter(group == "Feedback") %>% select(performance)
excitement_performance <- all_data_feedback %>% filter(group == "Excitement") %>% select(performance)
depression_performance <- all_data_feedback %>% filter(group == "Depression") %>% select(performance)

p_feedback <- wilcox_test(feedback_performance$performance, baseline_performance_feedback$performance, "two.sided")
p_excitement <- wilcox_test(excitement_performance$performance, baseline_performance_feedback$performance, "greater")
p_depression <- wilcox_test(depression_performance$performance, baseline_performance_feedback$performance, "greater")

cat("Baseline vs Feedback (two-sided test, performance): p =", p_feedback, "\n")
cat("Baseline vs Excitement (one-tailed test, performance > Baseline): p =", p_excitement, "\n")
cat("Baseline vs Depression (one-tailed test, performance > Baseline): p =", p_depression, "\n")

In [None]:
# Combine plots for Feedback vs Baseline
combined_plot_feedback <- ggarrange(p7, p8, p10,
                                    labels = c("A", "B", "C"),
                                    ncol = 3, nrow = 1,
                                    heights = c(1, 1, 1),
                                    common.legend = FALSE)
ggsave("combined_analysis_FeedbackvsBaseline.png", combined_plot_feedback, width = 15, height = 6, dpi = 300)

# Display the combined plot
print(combined_plot_feedback)

## Part 3: Payoff vs Baseline Analysis

This section compares the Payoff strategy against the Baseline over 1500s and 2500s, focusing on performance and accuracy distribution.

In [None]:
# Data loading
baseline_1500s_data_payoff <- read_csv("baseline_1500s_allresult_processed.csv")
baseline_2500s_data_payoff <- read_csv("baseline_2500s_allresult_processed.csv")
payoff_1500s_data <- read_csv("payoff_1500s_allresult_processed.csv")
payoff_2500s_data <- read_csv("payoff_2500s_allresult_processed.csv")

baseline_1500s_data_payoff <- baseline_1500s_data_payoff %>%
  select(-Schema, -schema_payoff)
baseline_2500s_data_payoff <- baseline_2500s_data_payoff %>%
  select(-Schema, -schema_payoff)
payoff_1500s_data <- payoff_1500s_data %>%
  select(-Schema, -schema_payoff)
payoff_2500s_data <- payoff_2500s_data %>%
  select(-Schema, -schema_payoff)

baseline_1500s_data_payoff$group <- "Baseline_1500s"
baseline_2500s_data_payoff$group <- "Baseline_2500s"
payoff_1500s_data$group <- "Payoff_1500s"
payoff_2500s_data$group <- "Payoff_2500s"

data_1500s_payoff <- bind_rows(baseline_1500s_data_payoff, payoff_1500s_data) %>%
  mutate(Subject = paste0(group, "_", Subject)) %>%
  arrange(Subject, Round, Phase)

data_2500s_payoff <- bind_rows(baseline_2500s_data_payoff, payoff_2500s_data) %>%
  mutate(Subject = paste0(group, "_", Subject)) %>%
  arrange(Subject, Round, Phase)

# Panel A: Average Performance Per Round in 1500s
performance_by_round_1500s_payoff <- data_1500s_payoff %>%
  group_by(group, Round) %>%
  summarise(
    mean_performance = mean(performance, na.rm = TRUE),
    se_performance = sd(performance, na.rm = TRUE) / sqrt(n())
  )
p11 <- ggplot(performance_by_round_1500s_payoff, aes(x = Round, y = mean_performance, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline_1500s" = "#1F77B4", "Payoff_1500s" = "#FF7F0E")) +
  labs(title = "Performance in 1500s ('High-Payoff' Strategy vs Baseline)", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Compare Payoff_1500s with Baseline_1500s performance
baseline_1500s_performance_payoff <- data_1500s_payoff %>% filter(group == "Baseline_1500s") %>% select(performance)
payoff_1500s_performance <- data_1500s_payoff %>% filter(group == "Payoff_1500s") %>% select(performance)
p_payoff_1500s <- wilcox_test(baseline_1500s_performance_payoff$performance, payoff_1500s_performance$performance, "two.sided")
cat("Payoff_1500s vs Baseline_1500s (two-sided test): p =", p_payoff_1500s, "\n")

# Display the plot
print(p11)
ggsave("perform_1500S_PayoffvsBaseline.png", p11, width = 8, height = 7, dpi = 300)

In [None]:
# Panel B: Average Performance Per Round in 2500s
performance_by_round_2500s_payoff <- data_2500s_payoff %>%
  group_by(group, Round) %>%
  summarise(
    mean_performance = mean(performance, na.rm = TRUE),
    se_performance = sd(performance, na.rm = TRUE) / sqrt(n())
  )
p12 <- ggplot(performance_by_round_2500s_payoff, aes(x = Round, y = mean_performance, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline_2500s" = "#1F77B4", "Payoff_2500s" = "#FF7F0E")) +
  labs(title = "Performance in 2500s ('High-Payoff' Strategy vs Baseline)", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p12)
ggsave("perform_2500S_PayoffvsBaseline.png", p12, width = 8, height = 7, dpi = 300)

In [None]:
# Panel C: AC Distribution Per Round (Bubble Plot)
ac_freq_payoff <- data_1500s_payoff %>%
  filter(Round <= 5) %>%
  group_by(group, Round, AC) %>%
  summarise(count = n(), .groups = "drop") %>%
  filter(!is.na(AC))
p13 <- ggplot(ac_freq_payoff, aes(x = Round, y = AC, size = count, color = group)) +
  geom_point(position = position_dodge(width = 0.8), alpha = 0.7) +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("Baseline_1500s" = "#1F77B4", "Payoff_1500s" = "#FF7F0E")) +
  labs(title = "Accuracy Distribution ('High-Payoff' vs Baseline)", x = "Round", y = "Accuracy", size = "Number", color = "Group") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p13)
ggsave("AC_1500S_PayoffvsBaseline.png", p13, width = 8, height = 7, dpi = 300)

In [None]:
# Combine plots for Payoff vs Baseline
combined_plot_payoff <- ggarrange(p11, p13,
                                  labels = c("A", "B"),
                                  ncol = 2, nrow = 1,
                                  heights = c(1, 1),
                                  common.legend = FALSE)
ggsave("combined_analysis_PayoffvsBaseline.png", combined_plot_payoff, width = 10, height = 6, dpi = 300)

# Display the combined plot
print(combined_plot_payoff)

## Part 4: WSLS vs Payoff Analysis

This section compares WSLS, Payoff, and Integration strategies against the Baseline over 1500s and 2500s, focusing on performance.

In [None]:
# 1500s Data loading
baseline_1500s_data_wsls_payoff <- read_csv("baseline_1500s_allresult_processed.csv")
WSLS_1500s_data <- read_csv("WSLS_1500s_allresult_processed.csv")
payoff_1500s_data_wsls <- read_csv("payoff_1500s_allresult_processed.csv")
integration_1500s_data <- read_csv("integration_1500s_allresult_processed.csv")

baseline_1500s_data_wsls_payoff <- baseline_1500s_data_wsls_payoff %>%
  select(-Schema, -schema_payoff)
WSLS_1500s_data <- WSLS_1500s_data %>%
  select(-Schema, -schema_payoff)
payoff_1500s_data_wsls <- payoff_1500s_data_wsls %>%
  select(-Schema, -schema_payoff)
integration_1500s_data <- integration_1500s_data %>%
  select(-Schema, -schema_payoff)

baseline_1500s_data_wsls_payoff$group <- "Baseline_1500s"
WSLS_1500s_data$group <- "WSLS_1500s"
payoff_1500s_data_wsls$group <- "Payoff_1500s"
integration_1500s_data$group <- "Integration_1500s"

all_data_1500s_wsls_payoff <- bind_rows(baseline_1500s_data_wsls_payoff, WSLS_1500s_data, payoff_1500s_data_wsls, integration_1500s_data) %>%
  mutate(Subject = paste0(group, "_", Subject)) %>%
  arrange(Subject, Round, Phase)

# Panel A: Average Performance Per Round in 1500s
performance_by_round_1500s_wsls_payoff <- all_data_1500s_wsls_payoff %>%
  group_by(group, Round) %>%
  summarise(
    mean_performance = mean(performance, na.rm = TRUE),
    se_performance = sd(performance, na.rm = TRUE) / sqrt(n())
  )
p14 <- ggplot(performance_by_round_1500s_wsls_payoff, aes(x = Round, y = mean_performance, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline_1500s" = "#1F77B4", "WSLS_1500s" = "#FF7F0E", "Payoff_1500s" = "#2CA02C", "Integration_1500s" = "#D62728")) +
  labs(title = "Performance of Different Adaptive Strategies in 1500s", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p14)
ggsave("perform_WSLSvsPayoff_1500s.png", p14, width = 8, height = 7, dpi = 300)

In [None]:
# 2500s Data loading
baseline_2500s_data_wsls_payoff <- read_csv("baseline_2500s_allresult_processed.csv")
WSLS_2500s_data <- read_csv("WSLS_2500s_allresult_processed.csv")
payoff_2500s_data_wsls <- read_csv("payoff_2500s_allresult_processed.csv")
integration_2500s_data <- read_csv("integration_2500s_allresult_processed.csv")

baseline_2500s_data_wsls_payoff <- baseline_2500s_data_wsls_payoff %>%
  select(-Schema, -schema_payoff)
WSLS_2500s_data <- WSLS_2500s_data %>%
  select(-Schema, -schema_payoff)
payoff_2500s_data_wsls <- payoff_2500s_data_wsls %>%
  select(-Schema, -schema_payoff)
integration_2500s_data <- integration_2500s_data %>%
  select(-Schema, -schema_payoff)

baseline_2500s_data_wsls_payoff$group <- "Baseline_2500s"
WSLS_2500s_data$group <- "WSLS_2500s"
payoff_2500s_data_wsls$group <- "Payoff_2500s"
integration_2500s_data$group <- "Integration_2500s"

all_data_2500s_wsls_payoff <- bind_rows(baseline_2500s_data_wsls_payoff, WSLS_2500s_data, payoff_2500s_data_wsls, integration_2500s_data) %>%
  mutate(Subject = paste0(group, "_", Subject)) %>%
  arrange(Subject, Round, Phase)

# Panel B: Average Performance Per Round in 2500s
performance_by_round_2500s_wsls_payoff <- all_data_2500s_wsls_payoff %>%
  group_by(group, Round) %>%
  summarise(
    mean_performance = mean(performance, na.rm = TRUE),
    se_performance = sd(performance, na.rm = TRUE) / sqrt(n())
  )
p15 <- ggplot(performance_by_round_2500s_wsls_payoff, aes(x = Round, y = mean_performance, color = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_performance - se_performance, ymax = mean_performance + se_performance), alpha = 0.08, linetype = 0) +
  scale_color_manual(values = c("Baseline_2500s" = "#1F77B4", "WSLS_2500s" = "#FF7F0E", "Payoff_2500s" = "#2CA02C", "Integration_2500s" = "#D62728")) +
  labs(title = "Performance of Different Adaptive Strategies in 2500s", x = "Round", y = "Average Performance Score") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))

# Display the plot
print(p15)
ggsave("perform_WSLSvsPayoff_2500s.png", p15, width = 8, height = 7, dpi = 300)

In [None]:
# Combine plots for WSLS vs Payoff
combined_plot_wsls_payoff <- ggarrange(p14, p15,
                                       labels = c("A", "B"),
                                       ncol = 2, nrow = 1,
                                       heights = c(1, 1),
                                       common.legend = FALSE)
ggsave("combined_analysis_WSLSvsPayoff.png", combined_plot_wsls_payoff, width = 10, height = 6, dpi = 300)

# Display the combined plot
print(combined_plot_wsls_payoff)