-
Notifications
You must be signed in to change notification settings - Fork 2
/
epitope-sex.Rmd
113 lines (93 loc) · 3.27 KB
/
epitope-sex.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
---
title: "Sex"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(data.table)
library(dplyr)
library(ggplot2)
library(reshape2)
library(scales)
library(parallel)
library(stringr)
library(knitr)
```
Load metadata
```{r}
dt.hip.stats = fread("annotations/hip_stats.txt") %>%
filter(!is.na(sex)) %>%
mutate(count_total = count, occurrences_total = diversity) %>%
select(sample_id, sex, count_total, occurrences_total)
```
Load VDJdb annotations with 1 mismatch for HIP data (time consuming, ~ 2mln clonotypes)
```{r}
dt.hip = rbindlist(mclapply(as.list(dt.hip.stats$sample_id),
function(x) fread(paste0("annotations/split_1mm/", x, ".annot.txt")) %>%
mutate(sample_id = x), mc.cores = 40)) %>%
group_by(sample_id, cdr3) %>%
summarise(count = sum(count), occurrences = n())
```
VDJdb data
```{r}
dt.vdjdb = fread("rearr_model/VDJDB_fullP_rob_ageing.txt") %>%
filter(gene == "TRB", mhc.class == "MHCI") %>%
mutate(hla_spec = str_split_fixed(mhc.a, pattern = "[:,]", 2)[,1]) %>%
select(cdr3, hla_spec, antigen.epitope, antigen.species) %>%
group_by(antigen.epitope) %>%
mutate(unique_cdrs = n()) %>%
filter(unique_cdrs > 30) %>%
select(cdr3, antigen.epitope, antigen.species, unique_cdrs)
```
Merge
```{r}
dt.hip.m = dt.hip %>%
merge(dt.hip.stats) %>%
merge(dt.vdjdb)
```
Summarise by sex
```{r}
dt.hip.s = as.data.table(dt.hip.m) %>%
group_by(sample_id, sex, antigen.epitope, antigen.species, unique_cdrs) %>%
summarise(occurrences = sum(occurrences)) %>%
group_by(sample_id) %>%
mutate(occurrences_share = occurrences / sum(occurrences) / unique_cdrs)
```
```{r}
dt.p = data.table(antigen.epitope = unique(dt.hip.s$antigen.epitope), p = 1, freq.ratio = 1) %>%
merge(dt.hip.s %>% ungroup %>% select(antigen.species, antigen.epitope) %>% unique)
for (i in 1:nrow(dt.p)) {
tmp = dt.hip.s %>% filter(antigen.epitope == dt.p$antigen.epitope[i])
dt.p$freq.ratio[i] = with(tmp, mean(occurrences_share[which(sex=="male")]) / mean(occurrences_share[which(sex=="female")]))
dt.p$p[i] = t.test(occurrences_share ~ sex, tmp)$p.value
}
dt.p$p = p.adjust(dt.p$p, method = "BH")
dt.p$len = nchar(as.character(dt.p$antigen.epitope))
kable(dt.p %>% arrange(p))
good_epi = (dt.p %>% filter(p < 0.05))$antigen.epitope
dt.hip.s = dt.hip.s %>%
mutate(antigen.epitope = ifelse(antigen.epitope %in% good_epi, paste(antigen.epitope, "(*)"),antigen.epitope))
```
```{r}
dt.hip.s.s = dt.hip.s %>%
#filter(ucb == T) %>%
group_by(antigen.epitope) %>%
summarise(freq = mean(occurrences_share[which(sex == "male")]))
dt.hip.s$antigen.epitope = factor(dt.hip.s$antigen.epitope,
levels = dt.hip.s.s$antigen.epitope[order(dt.hip.s.s$freq)])
ggplot(dt.hip.s, aes(x = antigen.epitope, group = paste(antigen.epitope,sex),
fill = sex,
y = occurrences_share)) +
geom_boxplot(color = "black") +
coord_flip() +
scale_fill_brewer(palette = "Set1") +
xlab("") + scale_y_continuous("Share of annotated rearrangements",
expand = c(0,0)) +
theme_bw() +
theme(aspect = 1.1,
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
```