In [13]:
library(rstatix)

# install install.packages("effectsize") if you don't have it
if (!requireNamespace("effectsize", quietly = TRUE)) {
  install.packages("effectsize")
}

# Load the effectsize package
library(effectsize)


Attaching package: 'effectsize'


The following objects are masked from 'package:rstatix':

    cohens_d, eta_squared




In [2]:
data_dir <- file.path(getwd(), "..", "figures", "analysis_1", Sys.Date())

if (!dir.exists(data_dir)) {
  # raise error if the directory does not exist
  stmt < -paste0("The directory ", data_dir, " does not exist. Please run the analysis first.")
  print(stmt)
  stop(stmt)
}
 
# load the data
exons_df <- read.csv(file.path(data_dir, "age_of_onset_vs_exon.csv"))
# make exon a factor
exons_df$exon <- as.factor(exons_df$exon)
head(exons_df)

Unnamed: 0_level_0,individual_id,age_of_onset,exon
Unnamed: 0_level_1,<int>,<dbl>,<fct>
1,4,2,99
2,5,10,99
3,6,2,99
4,7,10,99
5,19,12,47
6,35,11,8


In [4]:
# calculate effect size kruskal wallis
kruskal_eff <- kruskal_effsize(data = exons_df, formula = age_of_onset ~ exon, ci=TRUE)
kruskal_eff

Unnamed: 0_level_0,.y.,n,effsize,conf.low,conf.high,method,magnitude
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<ord>
Kruskal-Wallis chi-squared,age_of_onset,324,0.1638973,0.13,0.29,eta2[H],large


In [5]:
domain_df <- read.csv(file.path(data_dir, "age_of_onset_vs_domain.csv"))
# make domain a factor
domain_df$domain <- as.factor(domain_df$domain)
head(domain_df)

Unnamed: 0_level_0,individual_id,age_of_onset,domain
Unnamed: 0_level_1,<int>,<dbl>,<fct>
1,4,2,TM
2,5,10,TM
3,6,2,TM
4,7,10,TM
5,19,12,BSol
6,28,6,TM


In [6]:
domain_kruskal_eff <- kruskal_effsize(data = domain_df, formula = age_of_onset ~ domain,ci = TRUE)
domain_kruskal_eff

Unnamed: 0_level_0,.y.,n,effsize,conf.low,conf.high,method,magnitude
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<ord>
Kruskal-Wallis chi-squared,age_of_onset,404,0.02093855,0.0028,0.09,eta2[H],small


In [7]:
subdomain_df <- read.csv(file.path(data_dir, "age_of_onset_vs_subdomain.csv"))
# make subdomain a factor
subdomain_df$subdomain <- as.factor(subdomain_df$subdomain)
head(subdomain_df)

Unnamed: 0_level_0,individual_id,age_of_onset,subdomain
Unnamed: 0_level_1,<int>,<dbl>,<fct>
1,4,2,pVSD
2,5,10,pVSD
3,6,2,pVSD
4,7,10,pVSD
5,19,12,BSol1
6,28,6,pVSD


In [8]:
subdomain_kruskal_eff <- kruskal_effsize(data = subdomain_df, formula = age_of_onset ~ subdomain, ci = TRUE)
subdomain_kruskal_eff

Unnamed: 0_level_0,.y.,n,effsize,conf.low,conf.high,method,magnitude
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<chr>,<ord>
Kruskal-Wallis chi-squared,age_of_onset,393,0.04444228,0.02,0.12,eta2[H],small


In [10]:
d_exon <- dunn_test(age_of_onset ~ exon, data = exons_df, p.adjust.method = "bonferroni")
head(d_exon)

.y.,group1,group2,n1,n2,statistic,p,p.adj,p.adj.signif
<chr>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<dbl>,<chr>
age_of_onset,8,14,34,47,0.9161197,0.359604137,1.0,ns
age_of_onset,8,15,34,6,-0.3594223,0.719279195,1.0,ns
age_of_onset,8,37,34,9,-1.069042,0.28505076,1.0,ns
age_of_onset,8,43,34,8,0.7746816,0.4385278,1.0,ns
age_of_onset,8,44,34,13,-0.3995358,0.689498438,1.0,ns
age_of_onset,8,45,34,6,-3.4779289,0.000505304,0.1167252,ns


In [15]:
eff_size <- rank_biserial(age_of_onset ~ exon, data = exons_df)
eff_size

ERROR: Error: Grouping variable y must have exactly 2 levels.


In [None]:
# print out only significant results
d_exon_sig <- d_exon[d_exon$p.adj < 0.05,]

d_exon_sig

In [None]:
d_domain <- dunn_test(age_of_onset ~ domain, data = domain_df, p.adjust.method = "bonferroni")
d_domain$eff_size <- eff_size(d_domain)
d_domain$eff_size_label <- sapply(d_domain$eff_size, label_eff_size)

# print out only significant results
d_domain_sig <- d_domain[d_domain$p.adj < 0.05,]
d_domain_sig

In [None]:
d_subdomain <- dunn_test(age_of_onset ~ subdomain, data = subdomain_df, p.adjust.method = "bonferroni")
d_subdomain$eff_size <- eff_size(d_subdomain)
d_subdomain$eff_size_label <- sapply(d_subdomain$eff_size, label_eff_size)

# print out only significant results
d_subdomain_sig <- d_subdomain[d_subdomain$p.adj < 0.05,]
d_subdomain_sig