In [1]:
library(Seurat)
library(dplyr)
library(ggplot2)
library(DirichletReg)

set.seed(47)
setwd("~/Dropbox (MIT)/Zambia/")
options(repr.plot.width = 8, repr.plot.height = 8)
source("helper_scripts/plot_cluster_meta_percentage.R")
library(RColorBrewer)
cell_color_scheme = c(brewer.pal(n = 8, name = "Set2"),brewer.pal(n = 9, name = "Set1"),brewer.pal(n = 8, name = "Set3"))
# patient_color_scheme = readRDS("color_palette/cell_color_scheme.rdds")
setwd("~/Dropbox (MIT)/Zambia/reseq_analysis/")


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: Formula
Loading required package: rgl


In [2]:
combined = readRDS("combined_iterate_jan13.rds")

In [3]:
comb_zambia = subset(combined,study%in%c("EE"))
comb_zambia$region = factor(comb_zambia$region, levels = c("Duodenum", "Bulb", "Jejunum"))

In [4]:
hiv_colors = c("#9c954d","#b067a3")
disease_colors = readRDS("disease_colors.rds")
region_colors = readRDS("region_colors.rds")

# Looking at HIV within only duodenal bulb samples

In [6]:
Idents(comb_zambia) <- "tier4"
s_obj = subset(comb_zambia,region=="Bulb")
s_obj$cell_types = s_obj$tier4

all_counts = generate_all_counts(s_obj@meta.data)

sample_all_counts = generate_counts_by_sample(all_counts)
sample_meta = generate_sample_meta(s_obj@meta.data)
sample_meta = sample_meta[order(sample_meta$orig.ident),]
sample_all_counts = cbind(sample_all_counts, HIV.HTLV.=sample_meta$HIV.HTLV)

fischer_hiv = run_fischer(s_obj,"HIV.HTLV.","Y",s_obj$tier4)
pvals_hiv = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_hiv) <- fischer_hiv$clusters
signs_hiv = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_hiv) <- fischer_hiv$clusters

for(i in 1:length(unique(s_obj$orig.ident))){
    print(i)
    temp = subset(s_obj, orig.ident!=unique(s_obj$orig.ident)[i])
    pval_table_hiv = run_fischer(temp,"HIV.HTLV.","Y",temp$tier4)
    sign_table_hiv = run_fischer(temp,"HIV.HTLV.","Y",temp$tier4)
    
    for(j in 1:nrow(pval_table_hiv)){
        pvals_hiv[i,colnames(pvals_hiv)==pval_table_hiv$clusters[j]] = pval_table_hiv$adj_pval[j]
        signs_hiv[i,colnames(signs_hiv)==sign_table_hiv$clusters[j]] = sign_table_hiv$sign[j]
    }
}



[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11


In [5]:
same_sign = function(x){
    total = sum(x > 0)
    if(total ==0 ){
        return(TRUE)
        
    }else if(total==length(x)){
        return(TRUE)
    }
    else{
        return(FALSE)
    }
}
get_max_pvals = function(pvals,signs){
    max_pvals = apply(pvals,2,max)
    max_pvals = max_pvals[order(max_pvals)]
    same_signs = apply(signs,2,same_sign)
    max_pvals = max_pvals[same_signs]
    return(data.frame(max_adj_pvals=max_pvals))
}

max_pvals_hiv = get_max_pvals(pvals_hiv,signs_hiv)
max_pvals_hiv$cell_types = rownames(max_pvals_hiv)


ERROR: Error in apply(pvals, 2, max): object 'pvals_hiv' not found


In [26]:
max_pvals_hiv = readRDS("composition/max_pvals_hiv_only_bulb_jan19.rds")
s_obj = subset(comb_zambia,region=="Bulb")
s_obj$cell_types = s_obj$tier4

all_counts = generate_all_counts(s_obj@meta.data)

In [27]:
max_pvals_hiv$cell_types[max_pvals_hiv$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_hiv$cell_types[max_pvals_hiv$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"

In [28]:
sig_pvals = max_pvals_hiv %>% filter(max_adj_pvals < 0.05)
all_counts$HIV = "Negative"
all_counts$HIV[all_counts$orig.ident%in%c("EE_HIV_1B","EE_HIV_1D",'EE_HIV_1J',"EE_HIV_2B","EE_HIV_2D","EE_HIV_3B",'EE_HIV_3D',"EE_HIV_3J")] = "Positive"
pdf("~/zambia_eed_figures/supp_stricture_region_hiv/bulb_x_hiv_jan19.pdf",useDingbats = F)
blah = all_counts %>% filter(cell_types %in% sig_pvals$cell_types) %>% group_by(cell_types) %>% summarize(md=median(percent_of_sample))
# blah = blah[order(blah$md,decreasing = T),]
# all_counts$cell_types = as.character(all_counts$cell_types)
# all_counts$cell_types = factor(all_counts$cell_types,levels=blah$cell_types)
ggplot(all_counts %>% filter(cell_types %in% sig_pvals$cell_types),
      aes(x=cell_types,y=percent_of_sample,fill=HIV))+
    geom_boxplot() +
 theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=hiv_colors) 
dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/bulb_x_hiv_jan19_xlab.pdf",useDingbats = F)

blah = all_counts %>% filter(cell_types %in% sig_pvals$cell_types) %>% group_by(cell_types) %>% summarize(md=median(percent_of_sample))
# blah = blah[order(blah$md,decreasing = T),]
# all_counts$cell_types = as.character(all_counts$cell_types)
# all_counts$cell_types = factor(all_counts$cell_types,levels=blah$cell_types)
ggplot(all_counts %>% filter(cell_types %in% sig_pvals$cell_types),
      aes(x=cell_types,y=percent_of_sample,fill=HIV))+
    geom_boxplot() +
 theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 90, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("") + xlab("") + ylab("Percentage") +scale_fill_manual(values=hiv_colors) 
dev.off()

In [18]:
a = max_pvals_hiv %>% filter(cell_types %in% blah$cell_types)
a[order(match(a$cell_types,blah$cell_types)),]

Unnamed: 0_level_0,max_adj_pvals,cell_types
Unnamed: 0_level_1,<dbl>,<chr>
9,0.01358628,T gamma delta GZMAhi
1,1.5624510000000002e-28,Plasma cells
6,0.001867119,Stem OLFM4
4,1.345684e-05,T CD8 MALAT1 XIST NKTR
2,8.372422e-12,T gamma delta CXCR4hi
10,0.01572012,Epi FABP1 CD55
5,2.998787e-05,Stem OLFM4 LGR5
7,0.002059813,T CD4 CD69hi
11,0.03098162,Monocytes CD16B
8,0.004406012,Epi FABP1 PTMA


# HIV within distal duodenal samples

In [27]:
Idents(comb_zambia) <- "tier4"
s_obj = subset(comb_zambia,region=="Duodenum")
s_obj$cell_types = s_obj$tier4

all_counts = generate_all_counts(s_obj@meta.data)

sample_all_counts = generate_counts_by_sample(all_counts)
sample_meta = generate_sample_meta(s_obj@meta.data)
sample_meta = sample_meta[order(sample_meta$orig.ident),]
sample_all_counts = cbind(sample_all_counts, HIV.HTLV.=sample_meta$HIV.HTLV)

fischer_hiv = run_fischer(s_obj,"HIV.HTLV.","Y",s_obj$tier4)
pvals_hiv = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_hiv) <- fischer_hiv$clusters
signs_hiv = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_hiv) <- fischer_hiv$clusters

for(i in 1:length(unique(s_obj$orig.ident))){
    print(i)
    temp = subset(s_obj, orig.ident!=unique(s_obj$orig.ident)[i])
    pval_table_hiv = run_fischer(temp,"HIV.HTLV.","Y",temp$tier4)
    sign_table_hiv = run_fischer(temp,"HIV.HTLV.","Y",temp$tier4)
    
    for(j in 1:nrow(pval_table_hiv)){
        pvals_hiv[i,colnames(pvals_hiv)==pval_table_hiv$clusters[j]] = pval_table_hiv$adj_pval[j]
        signs_hiv[i,colnames(signs_hiv)==sign_table_hiv$clusters[j]] = sign_table_hiv$sign[j]
    }
}




[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11


In [28]:
same_sign = function(x){
    total = sum(x > 0)
    if(total ==0 ){
        return(TRUE)
        
    }else if(total==length(x)){
        return(TRUE)
    }
    else{
        return(FALSE)
    }
}
get_max_pvals = function(pvals,signs){
    max_pvals = apply(pvals,2,max)
    same_signs = apply(signs,2,same_sign)
    max_pvals = max_pvals[same_signs]
    return(data.frame(max_adj_pvals=max_pvals))
}

max_pvals_hiv = get_max_pvals(pvals_hiv,signs_hiv)
max_pvals_hiv$cell_types = rownames(max_pvals_hiv)
saveRDS(max_pvals_hiv,"composition/max_pvals_hiv_only_duodenum_jan19.rds")


In [23]:
max_pvals_hiv = readRDS("composition/max_pvals_hiv_only_duodenum_jan19.rds")
s_obj = subset(comb_zambia,region=="Duodenum")
s_obj$cell_types = s_obj$tier4

all_counts = generate_all_counts(s_obj@meta.data)

In [24]:
max_pvals_hiv$cell_types[max_pvals_hiv$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_hiv$cell_types[max_pvals_hiv$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"

In [25]:
sig_pvals = max_pvals_hiv %>% filter(max_adj_pvals < 0.05)
all_counts$HIV = "Negative"
all_counts$HIV[all_counts$orig.ident%in%c("EE_HIV_1B","EE_HIV_1D",'EE_HIV_1J',"EE_HIV_2B","EE_HIV_2D","EE_HIV_3B",'EE_HIV_3D',"EE_HIV_3J")] = "Positive"
pdf("~/zambia_eed_figures/supp_stricture_region_hiv/duodenum_x_hiv_jan19.pdf",useDingbats = F)
blah = all_counts %>% filter(cell_types %in% sig_pvals$cell_types) %>% group_by(cell_types) %>% summarize(md=median(percent_of_sample))
# blah = blah[order(blah$md,decreasing = T),]
# all_counts$cell_types = as.character(all_counts$cell_types)
# all_counts$cell_types = factor(all_counts$cell_types,levels=blah$cell_types)
ggplot(all_counts %>% filter(cell_types %in% sig_pvals$cell_types),
      aes(x=cell_types,y=percent_of_sample,fill=HIV))+
    geom_boxplot() +
 theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=hiv_colors) 
dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/duodenum_x_hiv_jan19_xlab.pdf",useDingbats = F)

blah = all_counts %>% filter(cell_types %in% sig_pvals$cell_types) %>% group_by(cell_types) %>% summarize(md=median(percent_of_sample))
# blah = blah[order(blah$md,decreasing = T),]
# all_counts$cell_types = as.character(all_counts$cell_types)
# all_counts$cell_types = factor(all_counts$cell_types,levels=blah$cell_types)
ggplot(all_counts %>% filter(cell_types %in% sig_pvals$cell_types),
      aes(x=cell_types,y=percent_of_sample,fill=HIV))+
    geom_boxplot() +
 theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 90, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("") + xlab("") + ylab("Percentage") +scale_fill_manual(values=hiv_colors) 
dev.off()

# Within jejunum with HIV negative vs positive

In [30]:
Idents(comb_zambia) <- "tier4"
s_obj = subset(comb_zambia,region=="Jejunum")
s_obj$cell_types = s_obj$tier4

all_counts = generate_all_counts(s_obj@meta.data)

sample_all_counts = generate_counts_by_sample(all_counts)
sample_meta = generate_sample_meta(s_obj@meta.data)
sample_meta = sample_meta[order(sample_meta$orig.ident),]
sample_all_counts = cbind(sample_all_counts, HIV.HTLV.=sample_meta$HIV.HTLV)

fischer_hiv = run_fischer(s_obj,"HIV.HTLV.","Y",s_obj$tier4)
pvals_hiv = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_hiv) <- fischer_hiv$clusters
signs_hiv = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_hiv) <- fischer_hiv$clusters

for(i in 1:length(unique(s_obj$orig.ident))){
    print(i)
    temp = subset(s_obj, orig.ident!=unique(s_obj$orig.ident)[i])
    pval_table_hiv = run_fischer(temp,"HIV.HTLV.","Y",temp$tier4)
    sign_table_hiv = run_fischer(temp,"HIV.HTLV.","Y",temp$tier4)
    
    for(j in 1:nrow(pval_table_hiv)){
        pvals_hiv[i,colnames(pvals_hiv)==pval_table_hiv$clusters[j]] = pval_table_hiv$adj_pval[j]
        signs_hiv[i,colnames(signs_hiv)==sign_table_hiv$clusters[j]] = sign_table_hiv$sign[j]
    }
}





[1] 1
[1] 2
[1] 3
[1] 4
[1] 5


In [31]:
same_sign = function(x){
    total = sum(x > 0)
    if(total ==0 ){
        return(TRUE)
        
    }else if(total==length(x)){
        return(TRUE)
    }
    else{
        return(FALSE)
    }
}
get_max_pvals = function(pvals,signs){
    max_pvals = apply(pvals,2,max)
    max_pvals = max_pvals[order(max_pvals)]
    same_signs = apply(signs,2,same_sign)
    max_pvals = max_pvals[same_signs]
    return(data.frame(max_adj_pvals=max_pvals))
}

max_pvals_hiv = get_max_pvals(pvals_hiv,signs_hiv)
max_pvals_hiv$cell_types = rownames(max_pvals_hiv)
saveRDS(max_pvals_hiv,"composition/max_pvals_hiv_only_jejunum_jan19.rds")



In [29]:
max_pvals_hiv = readRDS("composition/max_pvals_hiv_only_jejunum_jan19.rds")
s_obj = subset(comb_zambia,region=="Jejunum")
s_obj$cell_types = s_obj$tier4

all_counts = generate_all_counts(s_obj@meta.data)

In [30]:
max_pvals_hiv$cell_types[max_pvals_hiv$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_hiv$cell_types[max_pvals_hiv$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"

In [31]:
sig_pvals = max_pvals_hiv %>% filter(max_adj_pvals < 0.05)
all_counts$HIV = "Negative"
all_counts$HIV[all_counts$orig.ident%in%c("EE_HIV_1B","EE_HIV_1D",'EE_HIV_1J',"EE_HIV_2B","EE_HIV_2D","EE_HIV_3B",'EE_HIV_3D',"EE_HIV_3J")] = "Positive"
pdf("~/zambia_eed_figures/supp_stricture_region_hiv/jejunum_x_hiv_jan19.pdf",useDingbats = F)
blah = all_counts %>% filter(cell_types %in% sig_pvals$cell_types) %>% group_by(cell_types) %>% summarize(md=median(percent_of_sample))
# blah = blah[order(blah$md,decreasing = T),]
# all_counts$cell_types = as.character(all_counts$cell_types)
# all_counts$cell_types = factor(all_counts$cell_types,levels=blah$cell_types)
ggplot(all_counts %>% filter(cell_types %in% sig_pvals$cell_types),
      aes(x=cell_types,y=percent_of_sample,fill=HIV))+
    geom_boxplot() +
 theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=hiv_colors) 
dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/jejunum_x_hiv_jan19_xlab.pdf",useDingbats = F)

blah = all_counts %>% filter(cell_types %in% sig_pvals$cell_types) %>% group_by(cell_types) %>% summarize(md=median(percent_of_sample))
# blah = blah[order(blah$md,decreasing = T),]
# all_counts$cell_types = as.character(all_counts$cell_types)
# all_counts$cell_types = factor(all_counts$cell_types,levels=blah$cell_types)
ggplot(all_counts %>% filter(cell_types %in% sig_pvals$cell_types),
      aes(x=cell_types,y=percent_of_sample,fill=HIV))+
    geom_boxplot() +
 theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 90, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("") + xlab("") + ylab("Percentage") +scale_fill_manual(values=hiv_colors) 
dev.off()

In [25]:
a = max_pvals_hiv %>% filter(cell_types %in% blah$cell_types)
a[order(match(a$cell_types,blah$cell_types)),]

Unnamed: 0_level_0,max_adj_pvals,cell_types
Unnamed: 0_level_1,<dbl>,<chr>
1,3.616566e-08,T CD8 CCL5hi CD6hi
3,0.0001639371,T CD8 CD69hi
4,0.0008849168,Stem OLFM4
2,0.0001230825,T gamma delta GZMAhi
5,0.01878373,BEST4


# By region within HIV negative patients

In [9]:
Idents(comb_zambia) <- "tier4"
s_obj = subset(comb_zambia,HIV.HTLV.=="N")
s_obj$cell_types = s_obj$tier4
all_counts = generate_all_counts(s_obj@meta.data)

sample_all_counts = generate_counts_by_sample(all_counts)
sample_meta = generate_sample_meta(s_obj@meta.data)
sample_meta = sample_meta[order(sample_meta$orig.ident),]
sample_all_counts = cbind(sample_all_counts, region=sample_meta$region)

fischer_bulb = run_fischer(s_obj,"region","Bulb",s_obj$tier4)
fischer_duodenum = run_fischer(s_obj,"region","Duodenum",s_obj$tier4)
fischer_jejunem = run_fischer(s_obj,"region","Jejunum",s_obj$tier4)

pvals_bulb = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_bulb) <- fischer_bulb$clusters
pvals_duodenum = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_duodenum) <- fischer_duodenum$clusters
pvals_jejunem = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_jejunem) <- fischer_jejunem$clusters

signs_bulb = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_bulb) <- fischer_bulb$clusters
signs_duodenum = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_duodenum) <- fischer_duodenum$clusters
signs_jejunem = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_jejunem) <- fischer_jejunem$clusters

for(i in 1:length(unique(s_obj$orig.ident))){
    print(i)
    temp = subset(s_obj, orig.ident!=unique(s_obj$orig.ident)[i])

    pval_table_bulb = run_fischer(temp,"region","Bulb",temp$tier4)
    pval_table_duodenum = run_fischer(temp,"region","Duodenum",temp$tier4)
    pval_table_jejunem = run_fischer(temp,"region","Jejunum",temp$tier4)

    sign_table_bulb = run_fischer(temp,"region","Bulb",temp$tier4)
    sign_table_duodenum = run_fischer(temp,"region","Duodenum",temp$tier4)
    sign_table_jejunem = run_fischer(temp,"region","Jejunum",temp$tier4)

    for(j in 1:nrow(pval_table_bulb)){
        pvals_bulb[i,colnames(pvals_bulb)==pval_table_bulb$clusters[j]] = pval_table_bulb$adj_pval[j]
        pvals_duodenum[i,colnames(pvals_duodenum)==pval_table_duodenum$clusters[j]] = pval_table_duodenum$adj_pval[j]
        pvals_jejunem[i,colnames(pvals_jejunem)==pval_table_jejunem$clusters[j]] = pval_table_jejunem$adj_pval[j]

        signs_bulb[i,colnames(signs_bulb)==sign_table_bulb$clusters[j]] = sign_table_bulb$sign[j]
        signs_duodenum[i,colnames(signs_duodenum)==sign_table_duodenum$clusters[j]] = sign_table_duodenum$sign[j]
        signs_jejunem[i,colnames(signs_jejunem)==sign_table_jejunem$clusters[j]] = sign_table_jejunem$sign[j]

    }
  
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19


In [10]:
same_sign = function(x){
    total = sum(x > 0)
    if(total ==0 ){
        return(TRUE)
        
    }else if(total==length(x)){
        return(TRUE)
    }
    else{
        return(FALSE)
    }
}
get_max_pvals = function(pvals,signs){
    max_pvals = apply(pvals,2,max)
    same_signs = apply(signs,2,same_sign)
    max_pvals = max_pvals[same_signs]
    return(data.frame(max_adj_pvals=max_pvals))
}

max_pvals_bulb = get_max_pvals(pvals_bulb,signs_bulb)
max_pvals_bulb$cell_types = rownames(max_pvals_bulb)
max_pvals_duodenum = get_max_pvals(pvals_duodenum,signs_duodenum)
max_pvals_duodenum$cell_types = rownames(max_pvals_duodenum)
max_pvals_jejunum = get_max_pvals(pvals_jejunem,signs_jejunem)
max_pvals_jejunum$cell_types = rownames(max_pvals_jejunum)

In [7]:
saveRDS(max_pvals_bulb,"composition/hiv_neg_only_bulb_jan19.rds")
saveRDS(max_pvals_duodenum,"composition/hiv_neg_only_duodenum_jan19.rds")
saveRDS(max_pvals_jejunum,"composition/hiv_neg_only_jejunum_jan19.rds")



In [5]:
max_pvals_bulb = readRDS("composition/hiv_neg_only_bulb_jan19.rds")
max_pvals_duodenum = readRDS("composition/hiv_neg_only_duodenum_jan19.rds")
max_pvals_jejunum = readRDS("composition/hiv_neg_only_jejunum_jan19.rds")




In [11]:
distinct_tier4 = distinct_at(comb_zambia@meta.data,vars(tier1,tier4))

In [12]:
max_pvals_bulb$cell_types[max_pvals_bulb$cell_types=="Ent HIV IFN"] = "Ent ISG15 IFI6"
max_pvals_duodenum$cell_types[max_pvals_bulb$duodenum=="Ent HIV IFN"] = "Ent ISG15 IFI6"
max_pvals_jejunum$cell_types[max_pvals_bulb$jejunum=="Ent HIV IFN"] = "Ent ISG15 IFI6"

max_pvals_bulb$cell_types[max_pvals_bulb$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_duodenum$cell_types[max_pvals_duodenum$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_jejunum$cell_types[max_pvals_jejunum$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"

max_pvals_bulb$cell_types[max_pvals_bulb$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"
max_pvals_duodenum$cell_types[max_pvals_duodenum$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"
max_pvals_jejunum$cell_types[max_pvals_jejunum$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"

In [13]:
fischer_jejunum = fischer_jejunem
sig_bulb = max_pvals_bulb %>% filter(max_adj_pvals < 0.05)
fischer_bulb$cell_types = fischer_bulb$clusters
enriched_bulb = left_join(sig_bulb,fischer_bulb) %>% filter(sign==1)

sig_duodenum = max_pvals_duodenum %>% filter(max_adj_pvals < 0.05)
fischer_duodenum$cell_types = fischer_duodenum$clusters
enriched_duodenum = left_join(sig_duodenum,fischer_duodenum) %>% filter(sign==1)

sig_jejunum = max_pvals_jejunum %>% filter(max_adj_pvals < 0.05)
fischer_jejunum$cell_types = fischer_jejunum$clusters
enriched_jejunum = left_join(sig_jejunum,fischer_jejunum) %>% filter(sign==1)

get_region = function(orig){
    reg = substr(orig,nchar(orig),nchar(orig)+1)
    if(reg=="B"){
        return("Bulb")
    }
    if(reg=="D"){
        return("Duodenum")
    }
    if(reg=="J"){
        return("Jejunum")
    }
}
regions = sapply(all_counts$orig.ident,get_region)
all_counts$region = regions

bulb_cells = enriched_bulb$cell_types
duodenum_cells = enriched_duodenum$cell_types
jejunum_cells = enriched_jejunum$cell_types

bulb_cells_unique = bulb_cells[!(bulb_cells %in% c(duodenum_cells,jejunum_cells))]
duodenum_cells_unique = duodenum_cells[!(duodenum_cells %in% c(bulb_cells,jejunum_cells))]
jejunum_cells_unique = jejunum_cells[!(jejunum_cells %in% c(bulb_cells,duodenum_cells))]

“Column `cell_types` joining character vector and factor, coercing into character vector”

In [14]:
sig_bulb %>% filter(cell_types%in%bulb_cells_unique)

max_adj_pvals,cell_types
<dbl>,<chr>
0.04833588,Endothelial
0.0184411,Epi DPCR1 TFF1
1.030953e-05,Epi FABP1 CD55
2.373984e-05,Epi LYZ TFF2
4.744092e-06,T CD4 CD69lo IL17A
1.125749e-07,T gamma delta CXCR4hi
0.0001077651,T gamma delta GZMAhi


In [15]:
sig_duodenum %>% filter(cell_types%in%duodenum_cells_unique)

max_adj_pvals,cell_types
<dbl>,<chr>
0.001162336,Ent APOA ALPI
0.04723192,Ent ISG15 IFI6
2.118611e-07,Ent SI
0.03938856,Goblet
1.169468e-06,Stem OLFM4


In [16]:
sig_jejunum%>% filter(cell_types%in%jejunum_cells_unique)

max_adj_pvals,cell_types
<dbl>,<chr>


In [17]:
pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_neg_region_bulb_enriched.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% bulb_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Duodenal bulb enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_neg_region_bulb_enrichedxlab.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% bulb_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 45, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Duodenal bulb enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_neg_region_duodenum_enriched.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% duodenum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Distal duodenum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_neg_region_duodenum_enrichedxlab.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% duodenum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 45, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Distal duodenum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_neg_region_jejunum_enriched.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% jejunum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Jejunum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_neg_region_jejunum_enrichedxlab.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% jejunum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 45, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Jejunum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

# By region within HIV positive patients

In [32]:
Idents(comb_zambia) <- "tier4"
s_obj = subset(comb_zambia,HIV.HTLV.=="Y")
s_obj$cell_types = s_obj$tier4
all_counts = generate_all_counts(s_obj@meta.data)

sample_all_counts = generate_counts_by_sample(all_counts)
sample_meta = generate_sample_meta(s_obj@meta.data)
sample_meta = sample_meta[order(sample_meta$orig.ident),]
sample_all_counts = cbind(sample_all_counts, region=sample_meta$region)

fischer_bulb = run_fischer(s_obj,"region","Bulb",s_obj$tier4)
fischer_duodenum = run_fischer(s_obj,"region","Duodenum",s_obj$tier4)
fischer_jejunem = run_fischer(s_obj,"region","Jejunum",s_obj$tier4)



In [33]:
Idents(comb_zambia) <- "tier4"
s_obj = subset(comb_zambia,HIV.HTLV.=="Y")
s_obj$cell_types = s_obj$tier4
all_counts = generate_all_counts(s_obj@meta.data)

sample_all_counts = generate_counts_by_sample(all_counts)
sample_meta = generate_sample_meta(s_obj@meta.data)
sample_meta = sample_meta[order(sample_meta$orig.ident),]
sample_all_counts = cbind(sample_all_counts, region=sample_meta$region)

fischer_bulb = run_fischer(s_obj,"region","Bulb",s_obj$tier4)
fischer_duodenum = run_fischer(s_obj,"region","Duodenum",s_obj$tier4)
fischer_jejunem = run_fischer(s_obj,"region","Jejunum",s_obj$tier4)

pvals_bulb = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_bulb) <- fischer_bulb$clusters
pvals_duodenum = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_duodenum) <- fischer_duodenum$clusters
pvals_jejunem = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(pvals_jejunem) <- fischer_jejunem$clusters

signs_bulb = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_bulb) <- fischer_bulb$clusters
signs_duodenum = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_duodenum) <- fischer_duodenum$clusters
signs_jejunem = matrix(rep(1,nrow(sample_all_counts)*length(unique(s_obj$cell_types))),
                  nrow=nrow(sample_all_counts),
                  ncol=length(unique(s_obj$cell_types)))
colnames(signs_jejunem) <- fischer_jejunem$clusters

for(i in 1:length(unique(s_obj$orig.ident))){
    print(i)
    temp = subset(s_obj, orig.ident!=unique(s_obj$orig.ident)[i])

    pval_table_bulb = run_fischer(temp,"region","Bulb",temp$tier4)
    pval_table_duodenum = run_fischer(temp,"region","Duodenum",temp$tier4)
    pval_table_jejunem = run_fischer(temp,"region","Jejunum",temp$tier4)

    sign_table_bulb = run_fischer(temp,"region","Bulb",temp$tier4)
    sign_table_duodenum = run_fischer(temp,"region","Duodenum",temp$tier4)
    sign_table_jejunem = run_fischer(temp,"region","Jejunum",temp$tier4)

    for(j in 1:nrow(pval_table_bulb)){
        pvals_bulb[i,colnames(pvals_bulb)==pval_table_bulb$clusters[j]] = pval_table_bulb$adj_pval[j]
        pvals_duodenum[i,colnames(pvals_duodenum)==pval_table_duodenum$clusters[j]] = pval_table_duodenum$adj_pval[j]
        pvals_jejunem[i,colnames(pvals_jejunem)==pval_table_jejunem$clusters[j]] = pval_table_jejunem$adj_pval[j]

        signs_bulb[i,colnames(signs_bulb)==sign_table_bulb$clusters[j]] = sign_table_bulb$sign[j]
        signs_duodenum[i,colnames(signs_duodenum)==sign_table_duodenum$clusters[j]] = sign_table_duodenum$sign[j]
        signs_jejunem[i,colnames(signs_jejunem)==sign_table_jejunem$clusters[j]] = sign_table_jejunem$sign[j]

    }
  
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8


In [34]:
same_sign = function(x){
    total = sum(x > 0)
    if(total ==0 ){
        return(TRUE)
        
    }else if(total==length(x)){
        return(TRUE)
    }
    else{
        return(FALSE)
    }
}
get_max_pvals = function(pvals,signs){
    max_pvals = apply(pvals,2,max)
    same_signs = apply(signs,2,same_sign)
    max_pvals = max_pvals[same_signs]
    return(data.frame(max_adj_pvals=max_pvals))
}

max_pvals_bulb = get_max_pvals(pvals_bulb,signs_bulb)
max_pvals_bulb$cell_types = rownames(max_pvals_bulb)
max_pvals_duodenum = get_max_pvals(pvals_duodenum,signs_duodenum)
max_pvals_duodenum$cell_types = rownames(max_pvals_duodenum)
max_pvals_jejunum = get_max_pvals(pvals_jejunem,signs_jejunem)
max_pvals_jejunum$cell_types = rownames(max_pvals_jejunum)

In [32]:
distinct_tier4 = distinct_at(comb_zambia@meta.data,vars(tier1,tier4))

In [14]:
saveRDS(max_pvals_bulb,"composition/hiv_pos_only_bulb_jan19.rds")
saveRDS(max_pvals_duodenum,"composition/hiv_pos_only_duodenum_jan19.rds")
saveRDS(max_pvals_jejunum,"composition/hiv_pos_only_jejunum_jan19.rds")




In [20]:
max_pvals_bulb = readRDS("composition/hiv_pos_only_bulb_jan19.rds")
max_pvals_duodenum = readRDS("composition/hiv_pos_only_duodenum_jan19.rds")
max_pvals_jejunum = readRDS("composition/hiv_pos_only_jejunum_jan19.rds")

In [35]:
max_pvals_bulb$cell_types[max_pvals_bulb$cell_types=="Ent HIV IFN"] = "Ent ISG15 IFI6"
max_pvals_duodenum$cell_types[max_pvals_duodenum$cell_types=="Ent HIV IFN"] = "Ent ISG15 IFI6"
max_pvals_jejunum$cell_types[max_pvals_jejunum$cell_types=="Ent HIV IFN"] = "Ent ISG15 IFI6"

max_pvals_bulb$cell_types[max_pvals_bulb$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_duodenum$cell_types[max_pvals_duodenum$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"
max_pvals_jejunum$cell_types[max_pvals_jejunum$cell_types=="T HIV IFN"] = "T CD8 IFI44 IFIT1"

max_pvals_bulb$cell_types[max_pvals_bulb$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"
max_pvals_duodenum$cell_types[max_pvals_duodenum$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"
max_pvals_jejunum$cell_types[max_pvals_jejunum$cell_types=="T CD4 CD69lo"] = "T CD4 CD69lo IL17A"

In [36]:
fischer_jejunum = fischer_jejunem
sig_bulb = max_pvals_bulb %>% filter(max_adj_pvals < 0.05)
fischer_bulb$cell_types = fischer_bulb$clusters
enriched_bulb = left_join(sig_bulb,fischer_bulb) %>% filter(sign==1)

sig_duodenum = max_pvals_duodenum %>% filter(max_adj_pvals < 0.05)
fischer_duodenum$cell_types = fischer_duodenum$clusters
enriched_duodenum = left_join(sig_duodenum,fischer_duodenum) %>% filter(sign==1)

sig_jejunum = max_pvals_jejunum %>% filter(max_adj_pvals < 0.05)
fischer_jejunum$cell_types = fischer_jejunum$clusters
enriched_jejunum = left_join(sig_jejunum,fischer_jejunum) %>% filter(sign==1)

get_region = function(orig){
    reg = substr(orig,nchar(orig),nchar(orig)+1)
    if(reg=="B"){
        return("Bulb")
    }
    if(reg=="D"){
        return("Duodenum")
    }
    if(reg=="J"){
        return("Jejunum")
    }
}
regions = sapply(all_counts$orig.ident,get_region)
all_counts$region = regions

bulb_cells = enriched_bulb$cell_types
duodenum_cells = enriched_duodenum$cell_types
jejunum_cells = enriched_jejunum$cell_types

bulb_cells_unique = bulb_cells[!(bulb_cells %in% c(duodenum_cells,jejunum_cells))]
duodenum_cells_unique = duodenum_cells[!(duodenum_cells %in% c(bulb_cells,jejunum_cells))]
jejunum_cells_unique = jejunum_cells[!(jejunum_cells %in% c(bulb_cells,duodenum_cells))]

“Column `cell_types` joining character vector and factor, coercing into character vector”

In [37]:
sig_bulb %>% filter(cell_types %in%bulb_cells_unique)

max_adj_pvals,cell_types
<dbl>,<chr>
0.04378627,Epi DPCR1 TFF1
8.178355e-31,Epi FABP1 CD55
0.04378627,Epi FABP1 PTMA
0.001759443,ILC3
0.0001582102,Monocytes CD16B
0.03464477,T CD4 CD69lo IL17A
1.517172e-18,T gamma delta CXCR4hi
1.546804e-06,T gamma delta GZMAhi


In [38]:
sig_duodenum %>% filter(cell_types %in%duodenum_cells_unique)

max_adj_pvals,cell_types
<dbl>,<chr>
8.089528999999999e-19,Ent ISG15 IFI6
0.01144208,Stem OLFM4
5.631109e-20,T CD8 IFI44 IFIT1


In [39]:
sig_jejunum %>% filter(cell_types %in%jejunum_cells_unique)

max_adj_pvals,cell_types
<dbl>,<chr>
0.0002602743,BEST4
6.47085e-06,Ent SI
1.154437e-14,Plasma cells
0.001684237,Stem cycling
1.154437e-14,T CD8 CCL5hi CD6hi
1.453466e-08,T CD8 MALAT1 XIST NKTR


In [40]:
pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_pos_region_bulb_enriched.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% bulb_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Duodenal bulb enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_pos_region_bulb_enrichedxlab.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% bulb_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 45, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Duodenal bulb enriched cell types") + xlab("") + ylab("Fract") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_pos_region_duodenum_enriched.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% duodenum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Distal duodenum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_pos_region_duodenum_enrichedxlab.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% duodenum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 45, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Distal duodenum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_pos_region_jejunum_enriched.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% jejunum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_blank(),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Jejunum enriched cell types") + xlab("") + ylab("Fraction of all cells in sample") +scale_fill_manual(values=region_colors) 

dev.off()

pdf("~/zambia_eed_figures/supp_stricture_region_hiv/hiv_pos_region_jejunum_enrichedxlab.pdf",useDingbats = F,height=6)

ggplot(all_counts %>% filter(cell_types %in% jejunum_cells_unique),aes(x=cell_types,y=percent_of_sample,fill=region)) + geom_boxplot() +
   # geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3,position=position_dodge(0.8)) +
    theme_classic() +
    theme(text = element_text(size=20),axis.text.x = element_text(angle = 45, hjust = 1),  panel.border = element_blank(),  
  # Remove panel grid lines
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  legend.position="top",
  # Remove panel background
  panel.background = element_blank()) +
    ggtitle("Jejunum enriched cell types") + xlab("") + ylab("Fr") +scale_fill_manual(values=region_colors) 

dev.off()

In [35]:
print("done")

[1] "done"
