
# 📊 Bar Plots & One-way ANOVA
Analyze and visualize group differences with bar plots and ANOVA in R.

---

**🗂️ Last updated:** 18 September 2025  
**🐳 Docker image:** `gnasello/datascience-env:2025-09-18`


## 📦 Load Required Packages

In [None]:

library(dataprepUtils)
library(statsUtils)
library(ggplotUtils)


## 🏷️ Customize Plot Labels

In [None]:

title <- "Plot title"
xlabel <- "X-axis label"
ylabel <- "Y-axis label"


## 🎨 Set Colors for Groups

In [None]:

scale_color_manual.values <- c(
  "Blank" = "#8b8c8cff",
  "αCDs" = "#4dbbd5ff",
  "AS" = "#ff6666ff",
  "αCDs + AS" = "#4d9999ff"
)


## 📁 Load Your Data

In [None]:
filetable <- "data.csv"

filename <- tools::file_path_sans_ext(filetable)
df <- read_and_process_data(
  filetable,
  x_col = "x",
  y_col = "y",
  xlabels_ordered = names(scale_color_manual.values)
)
head(df)
tail(df)


## 🧹 Optional Data Manipulation *(commented out)*

### ✅ Option 1: Keep only specific values in a column

In [None]:

# Uncomment and edit this section to keep only specific values in a chosen column
# values_to_keep <- c("Value1", "Value2")      # <-- Replace with values you want to keep
# column <- "ColumnName"                       # <-- Replace with the column name
# df <- subset(df, df[[column]] %in% values_to_keep)
# head(df)


### ❌ Option 2: Remove specific values from a column


In [None]:

# Uncomment and edit this section to remove specific values from a chosen column
# values_to_remove <- c("Value1", "Value2")    # <-- Replace with values you want to remove
# column <- "ColumnName"                       # <-- Replace with the column name
# df <- subset(df, !(df[[column]] %in% values_to_remove))
# head(df)


## 📊 Summarize the Data

In [None]:

df_summary <- statsUtils::data_summary(df, varname = "y", groupnames = c("x"))
print(df_summary)


## 🧪 Check ANOVA Assumptions

In [None]:
check_anova_assumptions(df, response = 'y', group = 'x')

## 🧮 Run One-way ANOVA

In [None]:

formula_var <- y ~ x
res.aov <- rstatix::anova_test(data = df, formula = formula_var)
res.aov


## 🔍 Post-hoc Tukey Test

In [None]:

pwc <- rstatix::tukey_hsd(df, formula = formula_var)
pwc


## 📊 Create Bar Plot

In [None]:

ylim <- c(0, 35000)
width <- 4.4
height <- 7.6

p <- create_complete_barplot(
  df,
  width = width,
  height = height,
  ylim = ylim,
  scale_color_manual.values = scale_color_manual.values,
  filename = paste(filename, '-barplot', sep=''),
  ylabel = ylabel
)

p


## 📐 Add p-values to Plot

In [None]:

pwc <- rstatix::add_xy_position(pwc, x = "x")
p_stats <- add_stat_annotations_auto(p, pwc, y.buffer = 0.5)
p_stats


## 🧩 Arrange Plots Side-by-Side

In [None]:

width_aligned <- 2 * width
options(repr.plot.width = width_aligned)

aligned_plots <- ggpubr::ggarrange(
  p, p_stats,
  nrow = 1,
  align = "hv",
  common.legend = FALSE
)

aligned_plots


## 💾 Export Plots

In [None]:

fileoutput <- paste0(filename, "-barplot_stats.svg")
ggplot2::ggsave(file = fileoutput, plot = aligned_plots, width = width_aligned, height = height)

fileoutput <- paste0(filename, "-barplot_stats.png")
ggplot2::ggsave(file = fileoutput, plot = aligned_plots, width = width_aligned, height = height)


## 📚 References


- [ANOVA in R (datanovia.com)](https://www.datanovia.com/en/lessons/anova-in-r/)
- [ggplot2 Bar Plot Guide](http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization#line-graph-with-error-bars)
- [Theory: Parametric vs Non-parametric](https://www.youtube.com/watch?v=dyGjxBi4-qo)
- [Theory: One-way ANOVA](https://www.youtube.com/watch?v=_VFLX7xJuqk)
- [Implementation in R](https://www.youtube.com/watch?v=lpdFr5SZR0Q)
