# Fig 5 analysis data prep
Generate expression files for trimester and sex/trimester specific iso/sQTL mapping, with outliers and relatives removed

In [1]:
library(data.table)
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.2     [32m✔[39m [34mdplyr  [39m 1.0.6
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mbetween()[39m   masks [34mdata.table[39m::between()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m    masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mfirst()[39m     masks [34mdata.table[39m::first()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m       masks [34mstats[39m::lag()
[31m✖[39m [34mdplyr[39m::[32mlast()[39m      masks [34mdata.table[39m::last()
[31m✖[39m [34mpurrr[39m::[32mtranspose()[39m masks [34mdata.table[39m::transpose(

In [9]:
# n=654
meta <- read.table("~/project-gandalm/isoform_twas/eqtl_new/metadata_inferSex_trimester.tsv", header = T)
head(meta)

Unnamed: 0_level_0,Subject,Age,Sex,inferSex,trimester
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<chr>,<int>
1,HSB100,-0.4027397,F,F,2
2,HSB152,-0.460274,M,M,2
3,HSB195,-0.4219178,M,M,2
4,HSB221,-0.4027397,F,F,2
5,HSB222,-0.4823288,F,F,2
6,HSB238,-0.4794521,F,F,2


In [10]:
# n=14
related <- read.table("~/project-gandalm/isoform_twas/genotype/all_data/isec_R2_greater_than_3/ancestry/related.txt", header = F)
head(related)

Unnamed: 0_level_0,V1
Unnamed: 0_level_1,<chr>
1,1614
2,11602
3,Br1779
4,Br2394
5,Br2402
6,Br2411


In [11]:
# n=654
pop <- read.table("~/project-gandalm/isoform_twas/genotype/all_data/isec_R2_greater_than_3/ancestry/ancestry_list/ancestry.tsv", header = T)
head(pop)
dim(pop)

Unnamed: 0_level_0,subject,ancestry
Unnamed: 0_level_1,<chr>,<chr>
1,898,eur
2,1038,eur
3,1046,eur
4,1092,eur
5,1102,eur
6,1107,eur


In [12]:
meta <- meta %>% left_join(pop, by = c("Subject"="subject"))
head(meta)
sum(is.na(meta$ancestry))

Unnamed: 0_level_0,Subject,Age,Sex,inferSex,trimester,ancestry
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<chr>,<int>,<chr>
1,HSB100,-0.4027397,F,F,2,afr
2,HSB152,-0.460274,M,M,2,eur
3,HSB195,-0.4219178,M,M,2,eur
4,HSB221,-0.4027397,F,F,2,amr
5,HSB222,-0.4823288,F,F,2,eur
6,HSB238,-0.4794521,F,F,2,amr


In [13]:
meta <- meta %>% filter(!Subject %in% related$V1)
dim(meta)

In [None]:
write.table(meta, "../../eqtl_new/metadata_inferSex_trimester_ancestry_640.tsv", col.names = T, row.names = F, quote = F, sep = "\t")

### isoQTL

In [23]:
expr <- fread("../data/tx.counts.scaled.normalized.bed.gz", data.table = F)

In [24]:
outlier <- read.table("../data/tx.outlier.txt")
head(outlier)

Unnamed: 0_level_0,V1
Unnamed: 0_level_1,<chr>
1,17111
2,17486
3,HSB194
4,HSB618
5,Br2396
6,Br2403


In [25]:
all_iso <- meta %>% filter(!Subject %in% outlier$V1)
dim(all_iso)

In [27]:
iso <- expr %>% select(c(1:4), all_iso$Subject)
write.table(iso, "../data/tx_626.bed", col.names = T, row.names = F, quote = F, sep = "\t")

In [19]:
eur_m_iso <- meta %>% filter(!Subject %in% outlier$V1, inferSex == "M", ancestry == "eur")
eur_f_iso <- meta %>% filter(!Subject %in% outlier$V1, inferSex == "F", ancestry == "eur")
dim(eur_m_iso)
dim(eur_f_iso)
table(eur_m_iso$trimester)
table(eur_f_iso$trimester)


 1  2 
61 77 


 1  2  3 
75 62  2 

In [23]:
expr_m <- expr %>% select(c(1:4), eur_m_iso$Subject)
expr_f <- expr %>% select(c(1:4), eur_f_iso$Subject)
write.table(expr_m, "../data/tx.counts.scaled.normalized.M.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(expr_f, "../data/tx.counts.scaled.normalized.F.bed", col.names = T, row.names = F, quote = F, sep = "\t")

In [24]:
dim(expr_m)
dim(expr_f)

In [15]:
# above is outdated. Use ALL for M/F analysis
m_iso <- meta %>% filter(!Subject %in% outlier$V1, inferSex == "M")
f_iso <- meta %>% filter(!Subject %in% outlier$V1, inferSex == "F")
dim(m_iso)
dim(f_iso)
table(m_iso$trimester)
table(f_iso$trimester)
table(m_iso$ancestry)
table(f_iso$ancestry)


  1   2   3 
 97 231   1 


  1   2   3 
110 185   2 


afr amr  ea eur sea 
 70  91  12 138  18 


afr amr  ea eur sea 
 65  71  11 139  11 

In [16]:
expr_m <- expr %>% select(c(1:4), m_iso$Subject)
expr_f <- expr %>% select(c(1:4), f_iso$Subject)
write.table(expr_m, "../data/tx.counts.scaled.normalized.M.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(expr_f, "../data/tx.counts.scaled.normalized.F.bed", col.names = T, row.names = F, quote = F, sep = "\t")

In [17]:
dim(expr_m)
dim(expr_f)

In [25]:
eur_1_iso <- meta %>% filter(!Subject %in% outlier$V1, trimester == "1", ancestry == "eur")
eur_2_iso <- meta %>% filter(!Subject %in% outlier$V1, trimester == "2", ancestry == "eur")
dim(eur_1_iso)
dim(eur_2_iso)
table(eur_1_iso$inferSex)
table(eur_2_iso$inferSex)


 F  M 
75 61 


 F  M 
62 77 

In [26]:
expr_1 <- expr %>% select(c(1:4), eur_1_iso$Subject)
expr_2 <- expr %>% select(c(1:4), eur_2_iso$Subject)
write.table(expr_1, "../data/eur/tx.counts.scaled.normalized.tri1.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(expr_2, "../data/eur/tx.counts.scaled.normalized.tri2.bed", col.names = T, row.names = F, quote = F, sep = "\t")

### sQTL

In [18]:
intron <- fread("../../sqtl_new/data/eur/lc_combat.bed.gz", data.table = F)

In [28]:
dim(intron)

In [30]:
eur_m_intron <- meta %>% filter(inferSex == "M", ancestry == "eur")
eur_f_intron <- meta %>% filter(inferSex == "F", ancestry == "eur")
intron_m <- intron %>% select(c(1:4), eur_m_intron$Subject)
intron_f <- intron %>% select(c(1:4), eur_f_intron$Subject)
write.table(intron_m, "../../sqtl_new/data/eur/lc.m.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(intron_f, "../../sqtl_new/data/eur/lc.f.bed", col.names = T, row.names = F, quote = F, sep = "\t")

In [31]:
eur_1_intron <- meta %>% filter(trimester == "1", ancestry == "eur")
eur_2_intron <- meta %>% filter(trimester == "2", ancestry == "eur")
intron_1 <- intron %>% select(c(1:4), eur_1_intron$Subject)
intron_2 <- intron %>% select(c(1:4), eur_2_intron$Subject)
write.table(intron_1, "../../sqtl_new/data/eur/lc.tri1.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(intron_2, "../../sqtl_new/data/eur/lc.tri2.bed", col.names = T, row.names = F, quote = F, sep = "\t")

In [32]:
table(eur_m_intron$trimester)
table(eur_f_intron$trimester)


 1  2  3 
63 79  1 


 1  2  3 
78 64  3 

In [33]:
table(eur_1_intron$inferSex)
table(eur_2_intron$inferSex)


 F  M 
78 63 


 F  M 
64 79 

In [19]:
# use ALL for M/F analysis
intron <- fread("~/project-gandalm/isoform_twas/sqtl_new/cluster/lc_640.bed.gz", data.table = F)

In [20]:
dim(intron)

In [21]:
m_intron <- meta %>% filter(inferSex == "M")
f_intron <- meta %>% filter(inferSex == "F")
intron_m <- intron %>% select(c(1:4), m_intron$Subject)
intron_f <- intron %>% select(c(1:4), f_intron$Subject)
write.table(intron_m, "../../sqtl_new/data/lc.m.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(intron_f, "../../sqtl_new/data/lc.f.bed", col.names = T, row.names = F, quote = F, sep = "\t")

In [22]:
dim(intron_m)
dim(intron_f)

### eQTL

In [28]:
expr <- fread("../../eqtl_new/data/genes.629.bed.gz", data.table = F)

In [31]:
out <- read.table("../../eqtl_new/data/gene.outlier.txt")
head(out)

Unnamed: 0_level_0,V1
Unnamed: 0_level_1,<chr>
1,17111
2,HSB194
3,Br2403
4,Br2410
5,Br2411
6,Br2416


In [33]:
m_subj <- m_intron %>% filter(!Subject %in% out$V1)
f_subj <- f_intron %>% filter(!Subject %in% out$V1)

In [34]:
gene_m <- expr %>% select(c(1:4), m_subj$Subject)
gene_f <- expr %>% select(c(1:4), f_subj$Subject)
dim(gene_m)
dim(gene_f)
write.table(gene_m, "../../eqtl_new/data/genes.m.bed", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(gene_f, "../../eqtl_new/data/genes.f.bed", col.names = T, row.names = F, quote = F, sep = "\t")