In [1]:
library(plyr)
library(tidyverse)
library(readxl)

── [1mAttaching core tidyverse packages[22m ─────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.4.4     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ───────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32marrange()[39m   masks [34mplyr[39m::arrange()
[31m✖[39m [34mpurrr[39m::[32mcompact()[39m   masks [34mplyr[39m::compact()
[31m✖[39m [34mdplyr[39m::[32mcount()[39m     masks [34mplyr[39m::count()
[31m✖[39m [34mdplyr[39m::[32mdesc()[39m      masks [34mplyr[39m::desc()
[31m✖[39m [34mdplyr[39m::[32mfailwith()[39m  masks [34mplyr[39m::fai

In [2]:
source("utils/Task2_helperFunctions.r")

In [3]:
getwd()

In [4]:
mapToHigher = list("LLM" = "At least as good",
                   "Tied" = "At least as good",
                   "GO" = "At least as bad",
                   "Neither" = "At least as bad")

In [5]:
mapBlinded = list("Name B" = "LLM",
                  "Name A" = "GO",
                 "Neither" = "Neither",
                 "Tied" = "Tied")

### Read in SapBERT determination

In [6]:
sapbert_DF = read_delim(file = "data/omics_LLM_Enrichr_simVals_analyses_DF.tsv", delim = "\t")

[1mRows: [22m[34m250[39m [1mColumns: [22m[34m28[39m
[36m──[39m [1mColumn specification[22m [36m───────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (11): Source, GeneSetID, GeneSetName, GeneList, LLM Name, LLM Analysis, ...
[32mdbl[39m (17): ...1, index, n_Genes, Score, Rank, P-value, Adjusted P-value, Orig...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


### Read in consensus analyses

In [7]:
human_consensus_DF = read_xlsx(path = "data/Omics - blinded study.xlsx", sheet =  "Merged")

In [8]:
colnames(human_consensus_DF)

### Merge and analyse consensus

In [9]:
merged_DF = inner_join(x = sapbert_DF, y = human_consensus_DF, by = c("Source", "GeneSetID", "GeneSetName", "GeneList"))

In [10]:
merged_DF = merged_DF %>% 
rowwise() %>%
mutate(humanConsensusBlinded = getHumanConsensus(Winner_human1,Winner_human2,  Winner_human3, OverridingDecision),
       humanConsensus = mapBlinded[as.character(`humanConsensusBlinded`)],
        humanConsensus_higherLevel = mapToHigher[as.character(`humanConsensus`)],
       winner_higherLevel = mapToHigher[as.character(`winner`)])  %>%
select(c("Source", "GeneSetID", "GeneSetName", "GeneList", "winner", "humanConsensus" , "OverridingDecision", 
        "humanConsensus_higherLevel", "winner_higherLevel"))



In [12]:
merged_DF$winner_higherLevel <- factor(merged_DF$winner_higherLevel, levels=c("At least as good", "At least as bad"))


In [13]:
merged_DF$humanConsensus_higherLevel <- factor(merged_DF$humanConsensus_higherLevel, levels=c("At least as good", "At least as bad"))


In [28]:
merged_DF[,c("humanConsensus_higherLevel", "winner_higherLevel")]

humanConsensus_higherLevel,winner_higherLevel
<fct>,<fct>
At least as bad,At least as bad
At least as good,At least as bad
At least as good,At least as good
At least as good,At least as good
At least as bad,At least as bad
At least as good,At least as bad
At least as good,At least as bad
At least as bad,At least as good
At least as bad,At least as bad
At least as good,At least as good


In [21]:
 higherLevel_contingencyTable  = table(merged_DF[,c("humanConsensus_higherLevel", "winner_higherLevel")])

In [22]:
higherLevel_contingencyTable

                          winner_higherLevel
humanConsensus_higherLevel At least as good At least as bad
          At least as good               22              14
          At least as bad                 5               9

In [25]:
(higherLevel_contingencyTable[1,1] + higherLevel_contingencyTable[2,2]) /sum(higherLevel_contingencyTable)

In [27]:
(higherLevel_contingencyTable[1,1] + higherLevel_contingencyTable[2,2]) 