-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinRace_compare.Rmd
195 lines (165 loc) · 8.15 KB
/
LinRace_compare.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
---
title: "LinRace_compare"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{LinRace_compare}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
```{r setup}
#.libPaths("/nethome/xpan78/LinRace/Rlib")
library(devtools)
#load_all("/project/xpan78/LinRace-temp")
library(LinRace)
library(TedSim)
library(umap)
library(ggplot2)
library(phangorn)
library(DCLEAR)
library(TedSim)
library("TreeDist")
library("slingshot")
```
```{r}
#the zebrafish count data is from the scGESTALT paper
count_ZF <- read.csv("./inst/extdata/data_zebrafish/ZF1_F3_DrImpute.txt",sep = "\t")
expr_ZF <- count_ZF[,4:ncol(count_ZF)]
cluster_indent <- count_ZF[,2]
cm_ZF <- read.csv(file = "./inst/extdata/data_zebrafish/cm_ZF1_F3.txt",sep = "\t",row.names = 1,stringsAsFactors = FALSE)
rownames(cm_ZF) <- paste("cell",1:(nrow(cm_ZF)),sep = "_")
rownames(expr_ZF) <- paste("cell",1:(nrow(cm_ZF)),sep = "_")
color_assign <- read.csv("./inst/extdata/data_zebrafish/color_ZF1_F3.txt",sep = "\t")
pca_res <- prcomp(expr_ZF,scale. = TRUE)
cluster_id <- kmeans(pca_res$x[,1:50], centers = 20)$cluster
custom.config = umap.defaults
custom.config$min_dist = 0.3
umap_res <-umap(pca_res$x[,1:50],custom.config)
#p <- ggplot() + geom_point(aes(x = umap_res$layout[,1],y = umap_res$layout[,2],col = as.factor(cluster_id)))
df <- data.frame(dim1 = umap_res$layout[,1], dim2 = umap_res$layout[,2], color = as.factor(cluster_indent))
p <- ggplot(df) + geom_point(aes(x = dim1,y = dim2,col = color)) + labs(col = "Cluster ID") + xlab("UMAP 1") + ylab("UMAP 2")
p <- p + scale_color_manual(breaks = color_assign$cluster,values = paste0("#",color_assign$color))
p <- p + theme(axis.text = element_text(size = 15), axis.title = element_text(size = 20),legend.title = element_text(size = 15),legend.text = element_text(size = 15))
p
```
```{r}
sce <- slingshot(data = umap_res$layout,clusterLabels = cluster_id)
#state_lineages <- slingLineages(sce)
plot(umap_res$layout, col = cluster_id, pch = 16, cex = 1, main="Slingshot State Lineages",
xlab="UMAP 1", ylab="UMAP 2",)
lines(SlingshotDataSet(sce),col = "black", lwd = 2, type = 'lineages')
print("tree building start.")
colnames(cm_ZF) <- paste0("V",1:ncol(cm_ZF))
tree_linrace <- NJ_asym_Dif(as.data.frame(cm_ZF),cluster_id,expr_ZF,state_lineages,max_Iter = 500)
tip_label <- count_ZF$X[match(rownames(cm_ZF),tree_linrace$tip.label)]
tree_linrace$tip.label <- tip_label
#write.tree(tree_linrace,"/project/xpan78/data_zebrafish/tree_LinRace_ZF1_F3.newick")
```
```{r}
tree_Lintimat <- read.tree("./inst/extdata/data_zebrafish/tree_LinTIMaT_ZF1_F3.newick")
tree_Lintimat <- drop.tip(tree_Lintimat,"normal")
tree_Lintimat$edge.length <- NULL
tree_cas <- read.tree("./inst/extdata/data_zebrafish/tree_cassiopeia_ZF1_F3.processed.newick")
tree_cas$edge.length <- NULL
tree_dclear <- read.tree("./inst/extdata/data_zebrafish/tree_DCLEAR_nj_ZF1_F3.newick")
tree_dclear$edge.length <- NULL
tree_linrace <- read.tree("./inst/extdata/data_zebrafish/tree_LinRace_ZF1_F3.newick")
tree_cas <- multi2di(tree_cas)
tree_list <- list(tree_linrace,tree_Lintimat,tree_dclear)
```
```{r}
library(reshape2)
Nye_mutual <- matrix(0,3,3)
rownames(Nye_mutual) <- c("linrace","LinTIMaT","DCLEAR")
colnames(Nye_mutual) <- c("linrace","LinTIMaT","DCLEAR")
for(i in 1:3){
for(j in 1:3){
#rf_mutual[i,j] <- RF.dist(tree_list[[i]], tree_list[[j]], normalize = TRUE)
Nye_mutual[i,j] <- NyeSimilarity(tree_list[[i]], tree_list[[j]], normalize = TRUE)
}
}
melted_Nye_mutual <- melt(Nye_mutual)
p <- ggplot(data = melted_Nye_mutual, aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
p <- p + geom_text(aes(label = value))
p
```
```{r}
library(ggtree)
color_assign <- read.csv("./inst/extdata/data_zebrafish/color_ZF1_F3.txt",sep = "\t")
color_map <- color_assign$color[match(cluster_indent,color_assign$cluster)]
color_map <- color_map[match(count_ZF$X,tree_linrace$tip.label)]
color <-c(color_map,rep("000000",749))
color <- paste0("#",color)
p <- ggtree(tree_linrace, branch.length='none',layout = "circular", aes(color=I(color))) +
geom_point(color=color, size=1)
p <- ggtree(tree_linrace, branch.length='none',layout = "circular")
cell_type <- color_assign$type[match(cluster_indent,color_assign$cluster)]
cell_type <- cell_type[match(count_ZF$X,tree_linrace$tip.label)]
genotype <- data.frame(type = cell_type)
rownames(genotype) <- tree_linrace$tip.label
p <- gheatmap(p, genotype, offset=.8, width=.2,
colnames=FALSE, legend_title="type") + scale_fill_manual(breaks=c("Blood", "Forebrain", "Hindbrain", "Midbrain", "Mixed", "Progenitor", "Unassigned"),
values=c("#9c00e5", "#c68b00", "#006f24", "#113d9d", "black", "#e560d8", "white"), name="type")
p <- p + theme(legend.title = element_text(size = 20),legend.text = element_text(size = 15))
p
```
```{r}
color_map <- color_assign$color[match(cluster_indent,color_assign$cluster)]
color_map <- color_map[match(count_ZF$X,tree_Lintimat$tip.label)]
color <-c(color_map,rep("000000",length(tree_Lintimat$node.label)))
color <- paste0("#",color)
p <- ggtree(tree_Lintimat, branch.length='none',layout = "circular", aes(color=I(color))) +
geom_point(color=color, size=1)
p <- ggtree(tree_Lintimat, branch.length='none',layout = "circular")
cell_type <- color_assign$type[match(cluster_indent,color_assign$cluster)]
cell_type <- cell_type[match(count_ZF$X,tree_Lintimat$tip.label)]
genotype <- data.frame(type = cell_type)
rownames(genotype) <- tree_Lintimat$tip.label
p <- gheatmap(p, genotype, offset=.8, width=.2,
colnames=FALSE, legend_title="type") + scale_fill_manual(breaks=c("Blood", "Forebrain", "Hindbrain", "Midbrain", "Mixed", "Progenitor", "Unassigned"),
values=c("#9c00e5", "#c68b00", "#006f24", "#113d9d", "black", "#e560d8", "white"), name="type")
p <- p + theme(legend.title = element_text(size = 20),legend.text = element_text(size = 15))
p
```
```{r}
color_map <- color_assign$color[match(cluster_indent,color_assign$cluster)]
color_map <- color_map[match(count_ZF$X,tree_dclear$tip.label)]
color <-c(color_map,rep("000000",tree_dclear$Nnode))
color <- paste0("#",color)
p <- ggtree(tree_dclear, branch.length='none',layout = "circular", aes(color=I(color))) +
geom_point(color=color, size=1)
p <- ggtree(tree_dclear, branch.length='none',layout = "circular")
cell_type <- color_assign$type[match(cluster_indent,color_assign$cluster)]
cell_type <- cell_type[match(count_ZF$X,tree_dclear$tip.label)]
genotype <- data.frame(type = cell_type)
rownames(genotype) <- tree_dclear$tip.label
p <- gheatmap(p, genotype, offset=.8, width=.2,
colnames=FALSE, legend_title="type") + scale_fill_manual(breaks=c("Blood", "Forebrain", "Hindbrain", "Midbrain", "Mixed", "Progenitor", "Unassigned"),
values=c("#9c00e5", "#c68b00", "#006f24", "#113d9d", "black", "#e560d8", "white"), name="type")
p <- p + theme(legend.title = element_text(size = 20),legend.text = element_text(size = 15))
p
```
```{r}
color_map <- color_assign$color[match(cluster_indent,color_assign$cluster)]
color_map <- color_map[match(count_ZF$X,tree_cas$tip.label)]
color <-c(color_map,rep("000000",tree_cas$Nnode))
color <- paste0("#",color)
p <- ggtree(tree_cas, branch.length='none',layout = "circular", aes(color=I(color))) +
geom_point(color=color, size=1)
p <- ggtree(tree_cas, branch.length='none',layout = "circular")
cell_type <- color_assign$type[match(cluster_indent,color_assign$cluster)]
cell_type <- cell_type[match(count_ZF$X,tree_cas$tip.label)]
genotype <- data.frame(type = cell_type)
rownames(genotype) <- tree_cas$tip.label
p <- gheatmap(p, genotype, offset=.8, width=.2,
colnames=FALSE, legend_title="type") + scale_fill_manual(breaks=c("Blood", "Forebrain", "Hindbrain", "Midbrain", "Mixed", "Progenitor", "Unassigned"),
values=c("#9c00e5", "#c68b00", "#006f24", "#113d9d", "black", "#e560d8", "white"), name="type")
p <- p + theme(legend.title = element_text(size = 20),legend.text = element_text(size = 15))
p
```