# PREPARE DATA FOR ANALYSIS

Load in data; prepare reused parameters

In [1]:
# Libraries, source function and plotting parameters
library(glue)
library(lme4)
library(lmerTest)
library(RColorBrewer)
library(ggsignif)
library(emmeans)
library(broom.mixed)
library(tidyverse)

# Install and load svglite for SVG export
if (!require("svglite", quietly = TRUE)) {
  # Try different installation methods
  tryCatch({
    install.packages("svglite", dependencies = TRUE)
    library(svglite)
  }, error = function(e) {
    cat("Failed to install svglite. Trying alternative method...\n")
    tryCatch({
      # Try installing from source
      install.packages("svglite", type = "source")
      library(svglite)
    }, error = function(e2) {
      cat("Warning: svglite package could not be installed.\n")
      cat("SVG export may not work. Consider installing manually:\n")
      cat("install.packages('svglite')\n")
    })
  })
} 

Lade n"otiges Paket: Matrix


Attache Paket: 'lmerTest'


Das folgende Objekt ist maskiert 'package:lme4':

    lmer


Das folgende Objekt ist maskiert 'package:stats':

    step


Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.4     [32mv[39m [34mreadr    [39m 2.1.5
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.1
[32mv[39m [34mggplot2  [39m 3.5.2     [32mv[39m [34mtibble   [39m 3.3.0
[32mv[39m [34mlubridate[39m 1.9.4     [32mv[39m [34mtidyr    [39m 1.3.1
[32mv[39m [34mpurrr    [39m 1.1.0     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mtidyr[39m::[32mexpand()[39m masks [34mMatrix[39m::expand()
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()


Failed to install svglite. Trying alternative method...


"Installation des Pakets 'svglite' hatte Exit-Status ungleich 0"
Aktualisiere HTML-Index der Pakete in '.Library'

Making 'packages.html' ...
 fertig



SVG export may not work. Consider installing manually:
install.packages('svglite')


In [2]:
df <- read_csv("/Users/johannberger/Documents/thesis/data/2_preprocessed/285-345ms/combined_task-nouns_285-345ms.csv")
electrode <- c("FC1", "FCz", "FC2", "FCC1h", "FCC2h", "C1", "Cz", "C2", "CCP1h", "CCP2h", "CP1", "CPz", "CP2", "CPP1h", "CPP2h") # nolint: line_length_linter.
conditions <- c("animal", "food", "tool", "commun", "emotion","social") # nolint

plot_colors = c(
      "animal" = "#388E3C",
      "commun" = "#d9a00f",
      "emotion" = "#616161",
      "food" = "#1976D2",
      "social" = "#ff00ff",
      "tool" = "#D32F2F"
    )


[1mRows: [22m[34m5260848[39m [1mColumns: [22m[34m6[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ","
[31mchr[39m (4): subject, item, category, channel
[32mdbl[39m (2): repetition, voltage

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


Define six cumulative data frames, each adding another repetition

In [None]:
# Define data frames
df_1 <- df %>%
  filter(category %in% conditions,
         repetition == 1,
         channel %in% electrode) %>%
  group_by(item, subject, category, repetition) %>%
  summarise(mean_voltage = mean(voltage), .groups = "drop") %>%
  mutate(
    category = factor(category, levels = conditions)
  ) %>%
  { contrasts(.$category) <- contr.sum(length(conditions)); . }


df_1_2 <- df %>%
  filter(category   %in% conditions,
         repetition %in% 1:2,
         channel    %in% electrode) %>%
  group_by(item, subject, category, repetition) %>%
  summarise(mean_voltage = mean(voltage), .groups = "drop") %>%
  mutate(
    repetition_c = scale(repetition, scale = FALSE)[, 1],
    category     = structure(
                     factor(category, levels = conditions),
                     contrasts = contr.sum(length(conditions))
                   )
  )

df_1_3 <- df %>%
  filter(category   %in% conditions,
         repetition %in% 1:3,
         channel    %in% electrode) %>%
  group_by(item, subject, category, repetition) %>%
  summarise(mean_voltage = mean(voltage), .groups = "drop") %>%
  mutate(
    repetition_c = scale(repetition, scale = FALSE)[, 1],
    category     = structure(
                     factor(category, levels = conditions),
                     contrasts = contr.sum(length(conditions))
                   )
  )

df_1_4 <- df %>%
  filter(category   %in% conditions,
         repetition %in% 1:4,
         channel    %in% electrode) %>%
  group_by(item, subject, category, repetition) %>%
  summarise(mean_voltage = mean(voltage), .groups = "drop") %>%
  mutate(
    repetition_c = scale(repetition, scale = FALSE)[, 1],
    category     = structure(
                     factor(category, levels = conditions),
                     contrasts = contr.sum(length(conditions))
                   )
  )

df_1_5 <- df %>%
  filter(category   %in% conditions,
         repetition %in% 1:5,
         channel    %in% electrode) %>%
  group_by(item, subject, category, repetition) %>%
  summarise(mean_voltage = mean(voltage), .groups = "drop") %>%
  mutate(
    repetition_c = scale(repetition, scale = FALSE)[, 1],
    category     = structure(
                     factor(category, levels = conditions),
                     contrasts = contr.sum(length(conditions))
                   )
  )

df_1_6 <- df %>%
  filter(category   %in% conditions,
         repetition %in% 1:6,
         channel    %in% electrode) %>%
  group_by(item, subject, category, repetition) %>%
  summarise(mean_voltage = mean(voltage), .groups = "drop") %>%
  mutate(
    repetition_c = scale(repetition, scale = FALSE)[, 1],
    category     = structure(
                     factor(category, levels = conditions),
                     contrasts = contr.sum(length(conditions))
                   )
  )


# MODEL SELECTION

## Fitting four different models for the cumulative repetitions to see which one fits each subset the best

In [None]:
# Define Models
# REPETITION 1
mdl1_df1 <- lmer(mean_voltage ~ category + (1 | subject) + (1 | item), # This Model is used
                 data = df_1)


# REPETITION 1 + 2
mdl1_df1_2 <- lmer(mean_voltage ~ category + (1 | subject) + (1 | item), # This Model is used
                   data = df_1_2)
mdl2_df1_2 <- lmer(mean_voltage ~ category + repetition_c + (1 | subject) + (1 | item),
                   data = df_1_2)
mdl3_df1_2 <- lmer(mean_voltage ~ category * repetition_c + (1 | subject) + (1 | item),
                   data = df_1_2)
mdl4_df1_2 <- lmer(mean_voltage ~ category * repetition_c + (1 + repetition_c || subject) + (1 | item),
                   data = df_1_2)
anova(mdl1_df1_2, mdl2_df1_2, mdl3_df1_2, mdl4_df1_2)


# REPETITION 1 + 2 + 3
mdl1_df1_3 <- lmer(mean_voltage ~ category + (1 | subject) + (1 | item),
                   data = df_1_3)
mdl2_df1_3 <- lmer(mean_voltage ~ category + repetition_c + (1 | subject) + (1 | item), # This Model is used
                   data = df_1_3)
mdl3_df1_3 <- lmer(mean_voltage ~ category * repetition_c + (1 | subject) + (1 | item),
                   data = df_1_3)
mdl4_df1_3 <- lmer(mean_voltage ~ category * repetition_c + (1 + repetition_c || subject) + (1 | item),
                   data = df_1_3)
anova(mdl1_df1_3, mdl2_df1_3, mdl3_df1_3, mdl4_df1_3)


# REPETITION 1 + 2 + 3 + 4
mdl1_df1_4 <- lmer(mean_voltage ~ category + (1 | subject) + (1 | item),
                  data = df_1_4)
mdl2_df1_4 <- lmer(mean_voltage ~ category + repetition_c + (1 | subject) + (1 | item),
                  data = df_1_4)
mdl3_df1_4 <- lmer(mean_voltage ~ category * repetition_c + (1 | subject) + (1 | item),
                  data = df_1_4)
mdl4_df1_4 <- lmer(mean_voltage ~ category * repetition_c + (1 + repetition_c || subject) + (1 | item), # This Model is used
                  data = df_1_4)
anova(mdl1_df1_4, mdl2_df1_4, mdl3_df1_4, mdl4_df1_4)


# REPETITION 1 + 2 + 3 + 4 + 5
mdl1_df1_5 <- lmer(mean_voltage ~ category + (1 | subject) + (1 | item),
                  data = df_1_5)
mdl2_df1_5 <- lmer(mean_voltage ~ category + repetition_c + (1 | subject) + (1 | item),
                  data = df_1_5)
mdl3_df1_5 <- lmer(mean_voltage ~ category * repetition_c + (1 | subject) + (1 | item),
                  data = df_1_5)
mdl4_df1_5 <- lmer(mean_voltage ~ category * repetition_c + (1 + repetition_c || subject) + (1 | item), # This Model is used
                  data = df_1_5)
anova(mdl1_df1_5, mdl2_df1_5, mdl3_df1_5, mdl4_df1_5)


# REPETITION 1 + 2 + 3 + 4 + 5 + 6
mdl1_df1_6 <- lmer(mean_voltage ~ category + (1 | subject) + (1 | item),
                  data = df_1_6)
mdl2_df1_6 <- lmer(mean_voltage ~ category + repetition_c + (1 | subject) + (1 | item),
                  data = df_1_6)
mdl3_df1_6 <- lmer(mean_voltage ~ category * repetition_c + (1 | subject) + (1 | item),
                  data = df_1_6)
mdl4_df1_6 <- lmer(mean_voltage ~ category * repetition_c + (1 + repetition_c || subject) + (1 | item), # This Model is used
                  data = df_1_6)
anova(mdl1_df1_6, mdl2_df1_6, mdl3_df1_6, mdl4_df1_6)



Best-fitting model for each data subset (1–6 repetitions),
based on improvements in log-likelihood (lower AIC/BIC, significant Chi-square).

| Data Frame | Best Model        | Justification                                                                 |
|------------|-------------------|------------------------------------------------------------------------------|
| `df_1`     | `mdl1_df1`        | Only one repetition, no `repetition_c` possible — base model is sufficient. |
| `df_1_2`   | `mdl1_df1_2`      | No meaningful gain from adding repetition terms (`p > 0.2` in all models).  |
| `df_1_3`   | `mdl2_df1_3`      | Slight AIC/BIC improvement; marginal effect of repetition (`p = 0.08`).     |
| `f_1_4`    | `mdl4_f1_4`       | Strong improvement with random slope (`p < 0.001`).                          |
| `f_1_5`    | `mdl4_f1_5`       | Significant interaction/random slope effect (`p < 0.00001`).                |
| `f_1_6`    | `mdl4_f1_6`       | Best model by far (`p < 0.000001`, much lower AIC/BIC).                     |


---

## Choosing the model and investigate main effect of category

In [None]:
# Main Models
mdl1 <- mdl1_df1
mdl1_2 <- mdl1_df1_2 # Best model for 1-2 repetitions
mdl1_3 <- mdl2_df1_3 # Best model for 1-3 repetitions
mdl1_4 <- mdl4_df1_4 # Best model for 1-4 repetitions
mdl1_5 <- mdl4_df1_5 # Best model for 1-5 repetitions
mdl1_6 <- mdl4_df1_6 # Best model for 1-6 repetitions

# ANALYSIS

## Main effect of category

In [None]:
# ANOVA for each model
anova(mdl1, type = 3)
anova(mdl1_2, type = 3)
anova(mdl1_3, type = 3)
anova(mdl1_4, type = 3)
anova(mdl1_5, type = 3)
anova(mdl1_6, type = 3)

### Main Effect of Category Across Repetitions

| Repetitions | F value | p-value   | Significant? | Interpretation                                                  |
|-------------|---------|-----------|--------------|------------------------------------------------------------------|
| 1           | 3.26    | 0.0076    | ✅ Yes       | Semantic category already shows a reliable effect.              |
| 1–2         | 3.65    | 0.0036    | ✅ Yes       | Slight gain in robustness, but no essential change.             |
| 1–3         | 4.12    | 0.0015    | ✅ Yes       | Effect becomes more stable and statistically stronger.          |
| 1–4         | 3.61    | 0.0039    | ✅ Yes       | No further improvement beyond 3 repetitions.                    |
| 1–5         | 3.49    | 0.0049    | ✅ Yes       | Slight decline in F; no added benefit.                          |
| 1–6         | 3.59    | 0.0041    | ✅ Yes       | Effect remains significant, but plateau has clearly been reached.|

### (Prelimanary) Conclusion

- The **main effect of semantic category is already significant with just 1 repetition**.
- **Repetitions 2–3** provide **minor improvements** in statistical strength.
- **After 3 repetitions**, the F-statistic **plateaus or slightly declines**, indicating **no substantial gain**.
- There is **no evidence that more than 3 repetitions strengthen the category effect**.

---

## Pairwise comparisons

In [None]:
# Pairwise comparisons for all main models

adjustment <- "none" # Specify p-value adjustment method (change value here to 'bonferroni', 'tukey', etc.)

# Model 1 (1 repetition)
emm1 <- emmeans(mdl1, ~ category)
pairs1 <- pairs(emm1, adjust = adjustment)
cat("=== REPETITION 1 ===\n")
print(pairs1)
cat("\n")

# Model 1_2 (1-2 repetitions)
emm1_2 <- emmeans(mdl1_2, ~ category)
pairs1_2 <- pairs(emm1_2, adjust = adjustment)
cat("=== REPETITIONS 1-2 ===\n")
print(pairs1_2)
cat("\n")

# Model 1_3 (1-3 repetitions)
emm1_3 <- emmeans(mdl1_3, ~ category)
pairs1_3 <- pairs(emm1_3, adjust = adjustment)
cat("=== REPETITIONS 1-3 ===\n")
print(pairs1_3)
cat("\n")

# Model 1_4 (1-4 repetitions)
emm1_4 <- emmeans(mdl1_4, ~ category)
pairs1_4 <- pairs(emm1_4, adjust = adjustment)
cat("=== REPETITIONS 1-4 ===\n")
print(pairs1_4)
cat("\n")

# Model 1_5 (1-5 repetitions)
emm1_5 <- emmeans(mdl1_5, ~ category)
pairs1_5 <- pairs(emm1_5, adjust = adjustment)
cat("=== REPETITIONS 1-5 ===\n")
print(pairs1_5)
cat("\n")

# Model 1_6 (1-6 repetitions)
emm1_6 <- emmeans(mdl1_6, ~ category)
pairs1_6 <- pairs(emm1_6, adjust = adjustment)
cat("=== REPETITIONS 1-6 ===\n")
print(pairs1_6)
cat("\n")

# Summary of significant comparisons across models
cat("=== SUMMARY OF SIGNIFICANT PAIRWISE COMPARISONS ===\n")
cat("(p < 0.05 after none correction)\n\n")

In [None]:
# Extract significant comparisons for each model
extract_significant <- function(pairs_result, model_name) {
  sig_pairs <- summary(pairs_result) %>%
    filter(p.value < 0.05) %>%
    select(contrast, estimate, p.value)
  
  if(nrow(sig_pairs) > 0) {
    cat(paste0(model_name, ":\n"))
    for(i in 1:nrow(sig_pairs)) {
      cat(sprintf("  %s: Est = %.3f, p = %.4f\n", 
                 sig_pairs$contrast[i], 
                 sig_pairs$estimate[i], 
                 sig_pairs$p.value[i]))
    }
    cat("\n")
  } else {
    cat(paste0(model_name, ": No significant pairwise comparisons\n\n"))
  }
}

extract_significant(pairs1, "Repetition 1")
extract_significant(pairs1_2, "Repetitions 1-2")
extract_significant(pairs1_3, "Repetitions 1-3")
extract_significant(pairs1_4, "Repetitions 1-4")
extract_significant(pairs1_5, "Repetitions 1-5")
extract_significant(pairs1_6, "Repetitions 1-6")

### Summary

#### Table

| Rep     |n (uncorrected) |n (bonferroni)| contrasts (left contrast more negative)                             |
|---------|----------------|--------------|---------------------------------------------------------------------|
| Rep 1   | 4 | 2 | <span style="color:red"><b>t - s, e - s</b></span>, e - f, c - s         |
| Rep 1-2 | 4 | 2 | <span style="color:red"><b>t - s, e - s</b></span>, e - c, a - s         |
| Rep 1-3 | 5 | 3 | <span style="color:red"><b>a - s, t - s, e - s</b></span>, t - f, t - c  |
| Rep 1-4 | 5 | 2 | <span style="color:red"><b>t - s, e - s</b></span>, a - s, t - c         |
| Rep 1-5 | 4 | 3 | <span style="color:red"><b>a - s, t - s, e - s</b></span>, t - c         |
| Rep 1-6 | 5 | 2 | <span style="color:red"><b>t - s, e - s</b></span>, a - c, a - s, t -c   |

#### Conclusion
- no real gain after repetition 3
- corrected contrasts stay stable after repetition 3 and even loose significance
- for uncorrected: animal-communication only becomes significant for rep1-6:
    - might hint for an interaction of repetition:category (significant in the main model - 0.02) - semantic categories might be influenced differently after long exposure to visual word presentations
    - could also just be noise/random fluctuations (as later repetitions have a smaller SNR)



In [None]:
# Pairwise comparisons for repetition:category interaction in mdl1_6 (uncorrected)
# This explores how category differences change across repetitions

# Get estimated marginal means for the interaction
emm_interaction <- emmeans(mdl1_6, ~ category * repetition_c)

# Pairwise comparisons within each repetition level
pairs_by_repetition <- pairs(emm_interaction, by = "repetition_c", adjust = "none")
cat("=== CATEGORY COMPARISONS WITHIN EACH REPETITION LEVEL ===\n")
print(pairs_by_repetition)
cat("\n")

# Pairwise comparisons within each category (across repetitions)
pairs_by_category <- pairs(emm_interaction, by = "category", adjust = "none")
cat("=== REPETITION EFFECTS WITHIN EACH CATEGORY ===\n")
print(pairs_by_category)
cat("\n")

# Simple effects: category differences at specific repetition levels
# Note: repetition_c is centered, so we'll look at meaningful levels
rep_levels <- c(-2.5, -1.5, -0.5, 0.5, 1.5, 2.5)  # Corresponding to repetitions 1-6
simple_effects <- test(emm_interaction, by = "repetition_c")
cat("=== SIMPLE EFFECTS: CATEGORY DIFFERENCES AT EACH REPETITION ===\n")
print(simple_effects)

# PLOTS

## Data and Functions Setup

In [None]:
# ========== DATA PREPARATION ==========
# Load SNR analysis data exported from Python
snr_data <- read_csv("/Users/johannberger/Documents/thesis/data/3_analysis/cumulative_snr_data.csv")
f_data <- read_csv("/Users/johannberger/Documents/thesis/data/3_analysis/f_values_data.csv")

# Bundle ERP datasets and pairwise objects for systematic plotting
erp_datasets <- list(
  "1"   = list(df = df_1,   pairs = pairs1,   title = "— Repetition 1"),
  "1_2" = list(df = df_1_2, pairs = pairs1_2, title = "— Repetitions 1-2"),
  "1_3" = list(df = df_1_3, pairs = pairs1_3, title = "— Repetitions 1-3"),
  "1_4" = list(df = df_1_4, pairs = pairs1_4, title = "— Repetitions 1-4"),
  "1_5" = list(df = df_1_5, pairs = pairs1_5, title = "— Repetitions 1-5"),
  "1_6" = list(df = df_1_6, pairs = pairs1_6, title = "— Repetitions 1-6")
)

# ========== PLOTTING FUNCTIONS ==========
# Core ERP plotting function with significance brackets
create_erp_plot <- function(df, pairs_obj, conditions, plot_colors, title_suffix) {
  # Summary stats with numeric x positions
  mean_se_data <- df %>%
    group_by(category) %>%
    summarise(
      mean_val = mean(mean_voltage),
      se = sd(mean_voltage) / sqrt(n()),
      .groups = "drop"
    ) %>%
    mutate(x = as.numeric(factor(category, levels = conditions)))
  
  # Significant contrasts
  sig_contrasts <- summary(pairs_obj) %>%
    as.data.frame() %>%
    filter(p.value < 0.05) %>%
    mutate(contrast_clean = str_replace_all(contrast, " - ", " vs ")) %>%
    extract(contrast, into = c("cat1", "cat2"), regex = "^([^\\s-]+) - ([^\\s-]+)") %>%
    mutate(
      sig_label = case_when(
        p.value < 0.001 ~ "***",
        p.value < 0.01  ~ "**", 
        p.value < 0.05  ~ "*",
        TRUE            ~ ""
      )
    )
  
  # Bracket data for significance annotations
  bracket_data <- if (nrow(sig_contrasts) > 0) {
    base_y <- min(mean_se_data$mean_val - mean_se_data$se) - 0.2
    
    sig_contrasts %>%
      left_join(mean_se_data %>% select(category, mean_val, se, x),
                by = c("cat1" = "category")) %>%
      rename_with(~ paste0(.x, "_1"), c("mean_val", "se", "x")) %>%
      left_join(mean_se_data %>% select(category, mean_val, se, x),
                by = c("cat2" = "category")) %>%
      rename_with(~ paste0(.x, "_2"), c("mean_val", "se", "x")) %>%
      mutate(
        xmin_pos = x_1,
        xmax_pos = x_2,
        bracket_span = abs(xmax_pos - xmin_pos)
      ) %>%
      arrange(bracket_span, xmin_pos) %>%
      mutate(y_bracket = base_y - (row_number() - 1) * 0.25)
  } else {
    NULL
  }
  
  # Assemble bracket layers
  bracket_layers <- if (!is.null(bracket_data)) {
    list(
      geom_segment(data = bracket_data,
                   aes(x = xmin_pos, xend = xmax_pos, y = y_bracket, yend = y_bracket),
                   inherit.aes = FALSE, color = "black"),
      geom_segment(data = bracket_data,
                   aes(x = xmin_pos, xend = xmin_pos, y = y_bracket, yend = y_bracket + 0.05),
                   inherit.aes = FALSE, color = "black"),
      geom_segment(data = bracket_data,
                   aes(x = xmax_pos, xend = xmax_pos, y = y_bracket, yend = y_bracket + 0.05),
                   inherit.aes = FALSE, color = "black"),
      geom_text(data = bracket_data,
                aes(x = (xmin_pos + xmax_pos)/2, y = y_bracket - 0.05, label = sig_label),
                inherit.aes = FALSE, size = 4, hjust = 0.5)
    )
  } else {
    list()
  }
  
  # Build the ggplot object
  p <- ggplot(df, aes(x = category, y = mean_voltage, fill = category)) +
    stat_summary(fun = mean, geom = "col", width = 0.7) +
    stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
    bracket_layers +
    scale_y_reverse() +
    scale_fill_manual(values = plot_colors) +
    labs(
      title = glue("Mean ERP amplitude (285–345 ms) · Frontocentral ROI {title_suffix}"),
      x = NULL,
      y = "Mean voltage (\u03bcV)",
      fill = "Category"
    ) +
    theme(legend.position = "none")
  
  return(p)
}

## Plot Settings and Themes

In [None]:
# ========== PLOT PARAMETERS ==========
# Set plot sizes
options(repr.plot.width = 10, repr.plot.height = 7)

# Define common theme settings
base_theme <- theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16),
    panel.grid.minor = element_blank(),
    legend.position = "right",
    plot.margin = margin(10, 10, 10, 10)
  )

# ERP-specific theme modifications
erp_theme <- base_theme +
  theme(
    aspect.ratio = 0.7,
    plot.margin = margin(10, 10, 10, 10)
  )

# SNR plot theme
snr_theme <- base_theme +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
  )

# F-value plot theme  
f_theme <- base_theme +
  theme(
    panel.grid.major.x = element_blank()
  )

# Line plot theme (for repetition trends)
line_theme <- theme_minimal(base_size = 17) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 18),
    legend.position = "right"
  )

## Create All Plots

In [None]:
# ========== GENERATE ALL PLOTS ==========

# 1. ERP bar plots with significance brackets
erp_plots <- imap(erp_datasets, ~ {
  create_erp_plot(.x$df, .x$pairs, conditions, plot_colors, .x$title) + erp_theme
})

# 2. ERP line plot across repetitions (using full dataset)
repetition_trend_plot <- ggplot(df_1_6, aes(x = repetition, y = mean_voltage, 
                                           colour = category, group = category)) +
  stat_summary(fun = mean, geom = "line", linewidth = 1.2) +
  stat_summary(fun = mean, geom = "point", size = 2) +
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  scale_y_reverse() +
  scale_color_manual(values = plot_colors) +
  line_theme +
  theme(legend.position = "bottom",
        legend.direction = "horizontal") +
  labs(
    title = NULL,
    x = NULL,
    y = "Mean voltage (\u03bcV)",
    colour = "Category"
  )

# 3. Cumulative SNR plot
snr_plot <- ggplot(snr_data, aes(x = cumulative_repetitions, y = mean_snr)) +
  geom_line(color = "blue", size = 1.2) +
  geom_point(color = "blue", size = 3) +
  geom_errorbar(aes(ymin = mean_snr - sem_snr, ymax = mean_snr + sem_snr), 
                width = 0.2, color = "blue") +
  geom_text(aes(label = sprintf("%.2f", mean_snr)), 
            vjust = -0.8, hjust = 0.5, size = 3.5) +
  scale_x_continuous(breaks = snr_data$cumulative_repetitions,
                     labels = c("1", "1-2", "1-3", "1-4", "1-5", "1-6")) +
  snr_theme +
  labs(
    title = NULL,
    x = "Cumulative presentations",
    y = "Mean SNR"
  )

# 4. F-value plot
f_plot <- ggplot(f_data, aes(x = cumulative_repetitions, y = f_statistic)) +
  geom_col(alpha = 0.7, fill = "red", width = 0.6) +
  geom_text(aes(label = sprintf("%.2f", f_statistic)), 
            vjust = -0.5, hjust = 0.5, size = 3.5) +
  scale_x_continuous(breaks = f_data$cumulative_repetitions,
                     labels = c("1-2", "1-3", "1-4", "1-5", "1-6")) +
  f_theme +
  labs(
    title = NULL,
    x = "Cumulative presentations",
    y = "F-value"
  )

# 5. Combined SNR and F-value plot (requires patchwork)
# library(patchwork)
# combined_snr_f_plot <- snr_plot / f_plot + 
#   plot_annotation(title = "SNR Analysis Results", 
#                   theme = theme(plot.title = element_text(size = 18, hjust = 0.5)))

# ========== EXTRACT SPECIFIC ERP PLOTS FOR EASY ACCESS ==========
# Extract the two main ERP bar plots for direct manipulation
erp_plot_1_3 <- erp_plots[["1_3"]] + labs(title = NULL) + labs(y = "Mean voltage (\u03bcV)") + theme(legend.position = "none")
erp_plot_1_6 <- erp_plots[["1_6"]] + labs(title = NULL) + labs(y = "Mean voltage (\u03bcV)") + theme(legend.position = "none")


# ========== DISPLAY PLOTS ==========
print(repetition_trend_plot)
print(erp_plot_1_3)
print(erp_plot_1_6)
print(snr_plot)
print(f_plot)

In [None]:
# Alternative plot saving - if svglite fails, save as PDF instead
tryCatch({
  # Try SVG export first
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/repetition_trend_plot.svg", 
         plot = repetition_trend_plot, width = 10, height = 7, units = "in", device = "svg")
  cat("SVG export successful!\n")
}, error = function(e) {
  # Fallback to PDF if SVG fails
  cat("SVG export failed, saving as PDF instead...\n")
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/repetition_trend_plot.pdf", 
         plot = repetition_trend_plot, width = 10, height = 7, units = "in")
  
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/erp_plot_1_3.pdf", 
         plot = erp_plot_1_3, width = 10, height = 7, units = "in")
  
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/erp_plot_1_6.pdf", 
         plot = erp_plot_1_6, width = 10, height = 7, units = "in")
  
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/snr_plot.pdf", 
         plot = snr_plot, width = 10, height = 7, units = "in")
  
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/f_plot.pdf", 
         plot = f_plot, width = 10, height = 7, units = "in")
  
  cat("All plots saved as PDF files instead:\n")
  cat("- repetition_trend_plot.pdf\n")
  cat("- erp_plot_1_3.pdf\n") 
  cat("- erp_plot_1_6.pdf\n")
  cat("- snr_plot.pdf\n")
  cat("- f_plot.pdf\n")
})

In [None]:
# Create interaction plot with linear trend lines for each category
# This plot highlights how repetition slopes differ across semantic categories

# Create the interaction plot
interaction_plot <- ggplot(df_1_6, aes(x = repetition, y = mean_voltage, colour = category)) +
  # Individual data points (desaturated)
  stat_summary(fun = mean, geom = "point", size = 1.5, alpha = 0.4) +
  #stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2, alpha = 0.4) +
  
  # Linear trend lines for each category (prominent)
  geom_smooth(method = "lm", se = FALSE, linewidth = 1.5, alpha = 0.8) +
  
  # Styling
  scale_y_reverse() +
  scale_color_manual(values = plot_colors) +
  scale_x_continuous(breaks = 1:6) +
  line_theme +
  theme(legend.position = "bottom",
        legend.direction = "horizontal") +
  labs(
    #title = "Category-Specific Repetition Slopes",
    #subtitle = "Linear trend lines show different repetition effects across semantic categories",
    x = "Presentation",
    y = "Mean voltage (\u03bcV)",
    colour = "Category"
  )

# Display the plot
print(interaction_plot)

# Save the plot
tryCatch({
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/interaction_plot.svg", 
         plot = interaction_plot, width = 12, height = 8, units = "in", device = "svg")
  cat("Interaction plot saved as SVG!\n")
}, error = function(e) {
  ggsave("/Users/johannberger/Documents/thesis/data/4_plots/interaction_plot.pdf", 
         plot = interaction_plot, width = 12, height = 8, units = "in")
  cat("Interaction plot saved as PDF instead!\n")
})