-
Notifications
You must be signed in to change notification settings - Fork 0
/
sankeyplot.R
174 lines (127 loc) · 6.42 KB
/
sankeyplot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
library(RColorBrewer)
library(ggplot2)
library(dbscan)
cutoff <- function(num,u_lim){
if(num > u_lim){
return(u_lim)
}else return(num)
}
n_imp = 10
cols <-brewer.pal(3, "Set1")[c(2,1)]
original_data <- read.table("pji-blood-extract - cut_c4_fin+CRP_20191108.csv",sep=",",header=T)
original_data <- original_data[rowSums(is.na(original_data)) < 5, ]
original_data_preope <- original_data[original_data$Time_original=="1m_ago",]
original_data_postope <- original_data[original_data$Time_original!="1m_ago",]
all_list <- 1:nrow(original_data_preope)
train_list <- all_list[all_list %% 3 != 0]
test_list <- all_list[all_list %% 3 == 0]
original_data_train <- original_data_preope[train_list,]
original_data_train$dataset <- rep("train",nrow(original_data_train))
original_data_test <- original_data_preope[test_list,]
original_data_test$dataset <- rep("test",nrow(original_data_test))
original_data_postope$dataset <- rep("postope",nrow(original_data_postope))
UMAPdata_train <- read.table("YCU_PJI_preope_train_SRFdist_UMAP_embedding.txt",sep="\t",header=T,row.names=1)
UMAPdata_train <- data.frame(UMAP1=UMAPdata_train[,1],UMAP2=UMAPdata_train[,2],original_data_train)
UMAPdata_test <- read.table("YCU_PJI_preope_test_SRFdist_UMAP_embedding.txt",sep="\t",header=T,row.names=1)
UMAPdata_test <- data.frame(UMAP1=UMAPdata_test[,1],UMAP2=UMAPdata_test[,2],original_data_test)
UMAPdata_postope <- read.table("YCU_PJI_postope_SRFdist_UMAP_embedding.txt",sep="\t",header=T,row.names=1)
UMAPdata_postope <- data.frame(UMAP1=UMAPdata_postope[,1],UMAP2=UMAPdata_postope[,2],original_data_postope)
UMAPdata_preope <- rbind(UMAPdata_train,UMAPdata_test)
UMAPdata_all <- rbind(UMAPdata_preope,UMAPdata_postope)
# res <- dbscan(x = UMAPdata_all[,c("UMAP1","UMAP2")], eps = 0.8, minPts = 10)
res <- kmeans(UMAPdata_all[,c("UMAP1","UMAP2")],2)
clu <- res$cluster
UMAPdata_all$cluster <- clu
UMAPdata_preope$PJI <- as.factor(UMAPdata_preope$PJI)
UMAPdata_all$PJI <- as.factor(UMAPdata_all$PJI)
UMAPdata_all$Time_original <- factor(UMAPdata_all$Time_original, levels=c("1m_ago","3m_later","6m_later","12m_later"))
UMAPdata_all_wider <- UMAPdata_all[,c("ID","PJI","Time_original","cluster")] %>% pivot_wider(id_cols=c(ID,PJI),names_from=Time_original,values_from=cluster)
UMAPdata_all_wider$`1m_ago` <- UMAPdata_all_wider$`1m_ago` + 2
UMAPdata_all_wider$`3m_later` <- UMAPdata_all_wider$`3m_later` + 4
UMAPdata_all_wider$`6m_later` <- UMAPdata_all_wider$`6m_later` + 6
UMAPdata_all_wider$`12m_later` <- UMAPdata_all_wider$`12m_later` + 8
edge_1mago <- UMAPdata_all_wider %>% dplyr::count(PJI,`1m_ago`) %>% na.omit() %>% as_tbl_graph()
edge_1mago_to_3mlater <- UMAPdata_all_wider %>% dplyr::count(`1m_ago`, `3m_later`) %>% na.omit() %>% as_tbl_graph()
edge_3mlater_to_6mlater <- UMAPdata_all_wider %>% dplyr::count(`3m_later`, `6m_later`) %>% na.omit() %>% as_tbl_graph()
edge_6mlater_to_12mlater <- UMAPdata_all_wider %>% dplyr::count(`6m_later`, `12m_later`) %>% na.omit() %>% as_tbl_graph()
g <- edge_1mago %>% graph_join(edge_1mago_to_3mlater) %>% graph_join(edge_3mlater_to_6mlater) %>% graph_join(edge_6mlater_to_12mlater)
edge <- g %>% activate(edges) %>% as_tibble() %>% mutate(from=from-1,to=to-1)
node <- g %>% activate(nodes) %>% as_tibble()
p2 <- sankeyNetwork(
Links = edge,
Nodes = node,
Source = "from",
Target = "to",
Value = "n",
NodeID = "name",
fontSize = 12,
nodeWidth = 30,
iterations = 0
)
saveNetwork(p2, file="PJI_state_transition_sankeyplot.html")
UMAPdata_PJI <- UMAPdata_all[UMAPdata_all$PJI=="1",]
UMAPdata_PJI_wider <- UMAPdata_PJI[,c("ID","PJI","Time_original","cluster")] %>% pivot_wider(id_cols=c(ID,PJI),names_from=Time_original,values_from=cluster)
UMAPdata_PJI_wider$`1m_ago` <- UMAPdata_PJI_wider$`1m_ago` + 2
UMAPdata_PJI_wider$`3m_later` <- UMAPdata_PJI_wider$`3m_later` + 4
UMAPdata_PJI_wider$`6m_later` <- UMAPdata_PJI_wider$`6m_later` + 6
UMAPdata_PJI_wider$`12m_later` <- UMAPdata_PJI_wider$`12m_later` + 8
PJI_1mago_edge <- UMAPdata_PJI_wider %>%
dplyr::count(PJI,`1m_ago`) %>% na.omit() %>%
as_tbl_graph()
PJI_1mago_to_3mlater_edge <- UMAPdata_PJI_wider %>%
dplyr::count(`1m_ago`, `3m_later`) %>% na.omit() %>%
as_tbl_graph()
PJI_3mlater_to_6mlater_edge <- UMAPdata_PJI_wider %>%
dplyr::count(`3m_later`, `6m_later`) %>% na.omit() %>%
as_tbl_graph()
PJI_6mlater_to_12mlater_edge <- UMAPdata_PJI_wider %>%
dplyr::count(`6m_later`, `12m_later`) %>% na.omit() %>%
as_tbl_graph()
g <- PJI_1mago_edge %>% graph_join(PJI_1mago_to_3mlater_edge) %>% graph_join(PJI_3mlater_to_6mlater_edge) %>% graph_join(PJI_6mlater_to_12mlater_edge)
edge <- g %>% activate(edges) %>% as_tibble() %>% mutate(from=from-1,to=to-1)
node <- g %>% activate(nodes) %>% as_tibble()
p2 <- sankeyNetwork(
Links = edge,
Nodes = node,
Source = "from",
Target = "to",
Value = "n",
NodeID = "name",
fontSize = 12,
nodeWidth = 30,
iterations = 0
)
saveNetwork(p2, file="PJI_state_transition_sankeyplot_PJI.html")
UMAPdata_nonPJI <- UMAPdata_all[UMAPdata_all$PJI=="0",]
UMAPdata_nonPJI_wider <- UMAPdata_nonPJI[,c("ID","PJI","Time_original","cluster")] %>% pivot_wider(id_cols=c(ID,PJI),names_from=Time_original,values_from=cluster)
UMAPdata_nonPJI_wider$`1m_ago` <- UMAPdata_nonPJI_wider$`1m_ago` + 2
UMAPdata_nonPJI_wider$`3m_later` <- UMAPdata_nonPJI_wider$`3m_later` + 4
UMAPdata_nonPJI_wider$`6m_later` <- UMAPdata_nonPJI_wider$`6m_later` + 6
UMAPdata_nonPJI_wider$`12m_later` <- UMAPdata_nonPJI_wider$`12m_later` + 8
nonPJI_1mago_edge <- UMAPdata_nonPJI_wider %>%
dplyr::count(PJI,`1m_ago`) %>% na.omit() %>%
as_tbl_graph()
nonPJI_1mago_to_3mlater_edge <- UMAPdata_nonPJI_wider %>%
dplyr::count(`1m_ago`, `3m_later`) %>% na.omit() %>%
as_tbl_graph()
nonPJI_3mlater_to_6mlater_edge <- UMAPdata_nonPJI_wider %>%
dplyr::count(`3m_later`, `6m_later`) %>% na.omit() %>%
as_tbl_graph()
nonPJI_6mlater_to_12mlater_edge <- UMAPdata_nonPJI_wider %>%
dplyr::count(`6m_later`, `12m_later`) %>% na.omit() %>%
as_tbl_graph()
g <- nonPJI_1mago_edge %>% graph_join(nonPJI_1mago_to_3mlater_edge) %>% graph_join(nonPJI_3mlater_to_6mlater_edge) %>% graph_join(nonPJI_6mlater_to_12mlater_edge)
edge <- g %>% activate(edges) %>% as_tibble() %>% mutate(from=from-1,to=to-1)
node <- g %>% activate(nodes) %>% as_tibble()
p2 <- sankeyNetwork(
Links = edge,
Nodes = node,
Source = "from",
Target = "to",
Value = "n",
NodeID = "name",
fontSize = 12,
nodeWidth = 30,
iterations = 0
)
saveNetwork(p2, file="PJI_state_transition_sankeyplot_nonPJI.html")