<span STYLE="font-size:200%"> 
    Bar plots
</span>

Docker image: gnasello/datascience-env:2024-06-12\
Latest update: 26 June 2025

# Load required packages

In [None]:
# source('read_and_process_data.R')
library(dataprepUtils)
library(statsUtils)
library(ggplotUtils)

# Data prep

## Load data

In [None]:
filetable <- 'data.csv'
filename <- tools::file_path_sans_ext(filetable)

df <- read.csv(filetable)
names(df)

title <- ' '
xlabel <- ' '
ylabel <- 'CD25 MFI'

scale_color_manual.values <- c("Blank" = "#8b8c8cff", 
                               "αCDs" = "#4dbbd5ff", 
                               "AS" = "#ff6666ff", 
                               "αCDs + AS" = "#4d9999ff")

# Use the function to read and process the data
df <- read_and_process_data(filetable, 
                            x_col = "x", 
                            y_col = 'y',
                            xlabels_ordered = names(scale_color_manual.values),
                           )
head(df)
tail(df)

# Statistics

## Summarize the data

The function below will be used to calculate the mean and the standard deviation, for the variable of interest, in each group. See [tutorial](http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization#line-graph-with-error-bars)

In [None]:
# Summarize the data 
df_summary <- statsUtils::data_summary(df, varname = "y", groupnames = c("x"))
print(df_summary)

## One-way ANOVA

Steps for ANOVA in R were adapted from [www.datanovia.com/en/lessons/anova-in-r/](https://www.datanovia.com/en/lessons/anova-in-r/)

### Check ANOVA assumptions

In [None]:
check_anova_assumptions(df, response = 'y', group = 'x')

## Computation

Theory on Bivariate, Parametric vs Non Parametric Tests - https://www.youtube.com/watch?v=dyGjxBi4-qo&list=PLqzoL9-eJTNCYvKwinVR0kwhm_HSnYzQo&index=6

Theory One Way ANOVA (Analysis of Variance): Introduction - https://www.youtube.com/watch?v=_VFLX7xJuqk

Implementation in R - https://www.youtube.com/watch?v=lpdFr5SZR0Q

**Why ANOVA?**

Independent variable - Independent groups - Qualitative (> 2 levels) - Parametric

Outcome variable - Quantitative

In [None]:
# Define the formula as a variable
formula_var <- y ~ x

# Run the ANOVA test without using the pipe
res.aov <- rstatix::anova_test(data = df, formula = formula_var)

# View the result
res.aov

## Post-hoc tests


A significant one-way ANOVA is generally followed up by Tukey post-hoc tests to perform multiple pairwise comparisons between groups. Key R function: tukey_hsd() [rstatix].

In [None]:
# Pairwise comparisons without using the pipe
pwc <- rstatix::tukey_hsd(df, formula = formula_var)

# View the result
pwc

# Visualization

In [None]:
ylim <- c(0,35000)
width=4.4
height=7.6

## Bar plot of `y` over `x`

In [None]:
names(scale_color_manual.values)

In [None]:
p <- create_complete_barplot(df,
                             width=width,
                             height=height,
                             ylim=ylim,
                             scale_color_manual.values = scale_color_manual.values,
                             # filename = paste(filename, '-barplot', sep=''),
                             ylabel = ylabel
                            )

p

## Bar plots with p-values

In [None]:
pwc <- rstatix::add_xy_position(pwc, x = "x")

p_stats <- add_stat_annotations_auto(p, pwc, y.buffer=0.5)
p_stats

## Arrange plots with automatic alignment

In [None]:
width_aligned = 2*width
options(repr.plot.width = width_aligned)


# Arrange plots with automatic alignment
aligned_plots <- ggpubr::ggarrange(p, p_stats,
                                   nrow = 1,                # Arrange plots horizontally
                                   align = "hv",            # Align both horizontally and vertically
                                   common.legend = FALSE    # Option to share a legend
                                   )

aligned_plots

## Export plot

## `.svg` file

In [None]:
fileoutput <- paste(filename, '-barplot_stats','.svg', sep='')
fileoutput

In [None]:
ggplot2::ggsave(file=fileoutput, plot=aligned_plots, width=width_aligned, height=height)

## `.svg` file

In [None]:
fileoutput <- paste(filename, '-barplot_stats','.png', sep='')
fileoutput

In [None]:
ggplot2::ggsave(file=fileoutput, plot=aligned_plots, width=width_aligned, height=height)