-
Notifications
You must be signed in to change notification settings - Fork 0
/
external_scRNA.Rmd
160 lines (131 loc) · 10.1 KB
/
external_scRNA.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
---
title: "external_scRNA"
author: "Anthony Hung"
date: "2021-01-19"
output: workflowr::wflow_html
editor_options:
chunk_output_type: console
---
## Load in datasets and merge them
Load in datasets and create Seurat objects for each of them. Add metadata about the cell type they were assigned in their original contexts. Merge the Seurat object into one large object.
```{r Loading in data matrices}
if (file.exists("data/external_scRNA/Combined_singlecell_data_first.rds") & file.exists("data/external_scRNA/Combined_singlecell_data_allGenes.rds")){
Combined.common <- readRDS("data/external_scRNA/Combined_singlecell_data_first.rds")
Combined.common.allGenes <- readRDS("data/external_scRNA/Combined_singlecell_data_allGenes.rds")
} else {
#Pilot data
ANT1.2 <- readRDS("data/ANT1_2.rds")
ANT1.2 <- Seurat::AddMetaData(ANT1.2, "iPSC-Chondrocyte", col.name = "Cell.Type")
dim(ANT1.2)
#Chondrocytes from OA patients (https://www.ncbi.nlm.nih.gov/pubmed/30026257) run through STRT protocol
chondro <- read.table("data/external_scRNA/GSE104782_allcells_UMI_count.txt.gz", header = T)
chondro_genes <- chondro[,1]
chondro_data <- chondro[,-1]
chondro <- chondro_data
rownames(chondro) <- chondro_genes
#load chondro cluster data from paper
chondro_metadata <- read_csv("data/external_scRNA/GSE104782_Table_Cell_quality_information_and_clustering_information.csv")
#subset data to include only those that were assigned to a chondrocyte cluster in the paper
chondro.filtered <- chondro[,!is.na(chondro_metadata$Cluster)]
cluster.ident <- paste0("Chondro ", chondro_metadata$Cluster[!is.na(chondro_metadata$Cluster)])
#Create seurat object from chondrocytes
chondro.seurat <- CreateSeuratObject(chondro.filtered, project="Chondrocytes")
chondro.seurat <- Seurat::AddMetaData(chondro.seurat, cluster.ident, col.name = "Cell.Type")
dim(chondro.seurat)
#iPSC chondrogenic cells from teratoma (human iPSC injected into mouse); https://github.com/yanwu2014/teratoma-analysis-code
teratoma_data <- readRDS("/project2/gilad/anthonyhung/Projects/OAStrain_project/OAStrain/data/external_scRNA/McDonaldWu_etal/dm-ter-human-merged_clustering_v3.seurat.Robj") #they accidentally saved this as robj not rds
teratoma_metadata <- read.table("/project2/gilad/anthonyhung/Projects/OAStrain_project/OAStrain/data/external_scRNA/McDonaldWu_etal/dm-ter-human-merged_metadata.tsv", header = T, sep = "\t")
teratoma_data <- AddMetaData(teratoma_data, metadata = teratoma_metadata$cluster, col.name = "Cell.Type")
teratoma_chond <- subset(teratoma_data, Cell.Type == "Chondrogenic MSC/Fib")#subset to chondrogenic MSC/fib
DefaultAssay(object = teratoma_chond) <- "RNA"
dim(teratoma_chond)
#iPSC chondrogenic cells from directed differentiation (28 days of pellet) should have both mesenchymal like and chondrocyte like cells (without contaminants they claim) https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE160625
count_data_dicks_d7 <- readMM("data/external_scRNA/dicks_etal/GSM4876136_C59_D7_matrix.mtx.gz")
genes_dicks_d7 <- read_tsv("data/external_scRNA/dicks_etal/GSM4876136_C59_D7_genes.tsv.gz", col_names = F)
barcodes_dicks_d7 <- as.data.frame(read_tsv( "data/external_scRNA/dicks_etal/GSM4876136_C59_D7_barcodes.tsv.gz", col_names = F))
rownames(count_data_dicks_d7) <- genes_dicks_d7$X2
colnames(count_data_dicks_d7) <- barcodes_dicks_d7$X1
dicks_d7_seurat <- CreateSeuratObject(counts = count_data_dicks_d7, project = "d7")
dicks_d7_seurat <- Seurat::AddMetaData(dicks_d7_seurat, "dicks_d7", col.name = "Cell.Type")
count_data_dicks_d14 <- readMM("data/external_scRNA/dicks_etal/GSM4876137_C59_D14_matrix.mtx.gz")
genes_dicks_d14 <- read_tsv("data/external_scRNA/dicks_etal/GSM4876137_C59_D14_genes.tsv.gz", col_names = F)
barcodes_dicks_d14 <- as.data.frame(read_tsv( "data/external_scRNA/dicks_etal/GSM4876137_C59_D14_barcodes.tsv.gz", col_names = F))
rownames(count_data_dicks_d14) <- genes_dicks_d14$X2
colnames(count_data_dicks_d14) <- barcodes_dicks_d14$X1
dicks_d14_seurat <- CreateSeuratObject(counts = count_data_dicks_d14, project = "d14")
dicks_d14_seurat <- Seurat::AddMetaData(dicks_d14_seurat, "dicks_d14", col.name = "Cell.Type")
count_data_dicks_d28 <- readMM("data/external_scRNA/dicks_etal/GSM4876138_C59_D28_matrix.mtx.gz")
genes_dicks_d28 <- read_tsv("data/external_scRNA/dicks_etal/GSM4876138_C59_D28_genes.tsv.gz", col_names = F)
barcodes_dicks_d28 <- as.data.frame(read_tsv( "data/external_scRNA/dicks_etal/GSM4876138_C59_D28_barcodes.tsv.gz", col_names = F))
rownames(count_data_dicks_d28) <- genes_dicks_d28$X2
colnames(count_data_dicks_d28) <- barcodes_dicks_d28$X1
dicks_d28_seurat <- CreateSeuratObject(counts = count_data_dicks_d28, project = "d28")
dicks_d28_seurat <- Seurat::AddMetaData(dicks_d28_seurat, "dicks_d28", col.name = "Cell.Type")
count_data_dicks_d42 <- readMM("data/external_scRNA/dicks_etal/GSM4876139_C59_D42_matrix.mtx.gz")
genes_dicks_d42 <- read_tsv("data/external_scRNA/dicks_etal/GSM4876139_C59_D42_genes.tsv.gz", col_names = F)
barcodes_dicks_d42 <- as.data.frame(read_tsv( "data/external_scRNA/dicks_etal/GSM4876139_C59_D42_barcodes.tsv.gz", col_names = F))
rownames(count_data_dicks_d42) <- genes_dicks_d42$X2
colnames(count_data_dicks_d42) <- barcodes_dicks_d42$X1
dicks_d42_seurat <- CreateSeuratObject(counts = count_data_dicks_d42, project = "d42")
dicks_d42_seurat <- Seurat::AddMetaData(dicks_d42_seurat, "dicks_d42", col.name = "Cell.Type")
dicks_combined <- merge(dicks_d7_seurat, y = c(dicks_d14_seurat, dicks_d28_seurat, dicks_d42_seurat), project = "Combined.common")
dicks_combined[["percent.mt"]] <- PercentageFeatureSet(dicks_combined, pattern = "^MT-")
dicks_combined <- subset(dicks_combined, subset = percent.mt < 25)
dicks_combined <- NormalizeData(object = dicks_combined, normalization.method = "LogNormalize",
scale.factor = 10000)
dim(dicks_combined)
#Chondrocytes from healthy oLT of a male human with OA (outer lateral tibia) https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM4626766
## link to directories containing data files (count matrices)
chond_dir <- "data/external_scRNA/Chou_et_al/"
library(Matrix)
#read in data
chond_counts <- readMM(paste0(chond_dir, "GSM4626766_OA_oLT_113.matrix.mtx.gz"))
chond_genes <- read_tsv(paste0(chond_dir, "GSM4626766_OA_oLT_113.genes.tsv.gz"), col_names = F)
chond_barcodes <- as.data.frame(read_tsv(paste0(chond_dir, "GSM4626766_OA_oLT_113.barcodes.tsv.gz"), col_names = F))
#make Seurat object
rownames(chond_counts) <- chond_genes$X2
colnames(chond_counts) <- chond_barcodes$X1
Chond_seurat <- CreateSeuratObject(counts = chond_counts, project = "Chondrocytes") %>%
AddMetaData("Chondrocyte_", col.name = "Cell.Type")
dim(Chond_seurat)
#iPSCs/MSCs/osteoblasts from Human/chimp panel. Combine all the datasets and subset for humans
ghousman_data_dir <- ("/project2/gilad/ghousman/skeletal-human-chimp/human-chimp-skeletal-scRNA/data/cellranger-data-full/")
gh_ipsc_1.1 <- readRDS(paste0(ghousman_data_dir, "h.c.dat.spp.GHO-1.rds"))
gh_ipsc_1.1 <- Seurat::AddMetaData(gh_ipsc_1.1, "iPSC", col.name = "Cell.Type")
gh_ipsc_1.1[["percent.mt"]] <- PercentageFeatureSet(gh_ipsc_1.1, pattern = "^MT-")
gh_msc_1.1 <- readRDS(paste0(ghousman_data_dir, "h.c.dat.spp.GHO-2.rds"))
gh_msc_1.1 <- Seurat::AddMetaData(gh_msc_1.1, "iPSC-MSC", col.name = "Cell.Type")
gh_msc_1.1[["percent.mt"]] <- PercentageFeatureSet(gh_msc_1.1, pattern = "^MT-")
gh_chondro <- readRDS(paste0(ghousman_data_dir, "h.c.dat.spp.GHO-3.rds"))
gh_chondro <- Seurat::AddMetaData(gh_chondro, "iPSC-Chondro_GAH", col.name = "Cell.Type")
gh_chondro[["percent.mt"]] <- PercentageFeatureSet(gh_chondro, pattern = "^MT-")
gh_osteo <- readRDS(paste0(ghousman_data_dir, "h.c.dat.spp.GHO-4.rds"))
gh_osteo <- Seurat::AddMetaData(gh_osteo, "iPSC-Osteo", col.name = "Cell.Type")
gh_osteo[["percent.mt"]] <- PercentageFeatureSet(gh_osteo, pattern = "^MT-")
gh_big <- merge(gh_ipsc_1.1, y = c(gh_msc_1.1, gh_chondro, gh_osteo),
add.cell.ids = c("ipsc1.1", "msc1.1", "chondrogh", "osteo"), project = "ghousman")
gh_big_humans <- subset(gh_big, Species == "Human")
gh_big_humans <- subset(gh_big_humans, subset = percent.mt < 25)
gh_big_humans <- Seurat::AddMetaData(gh_big_humans, "Ghousman", col.name = "orig.ident")
dim(gh_big_humans)
#Human Liver samples run through 10x (https://doi.org/10.1038/s41467-018-06318-7). This is a subset of the entire dataset, which is from whole liver homogenate. I used scViz to subset the data for clusters that were called hepatocytes in the original study (3490 cells)
liver <- readRDS("data/external_scRNA/humanLiverSubset_hepatocytes.rds")
liver <- Seurat::AddMetaData(liver, "Hepatocyte", col.name = "Cell.Type")
dim(liver)
# Merge Datasets
# Merge the seurat objects into one large one, keeping metadata about which study they originated in as well as their assigned cell type.
# merge based on common genes between ALL datasets
common.features <- Reduce(intersect, list(rownames(ANT1.2), rownames(chondro.seurat), rownames(Chond_seurat), rownames(gh_big_humans), rownames(liver), rownames(teratoma_chond), rownames(dicks_combined)))
length(x = common.features)
Combined.common <- merge(ANT1.2[common.features, ], y = c(chondro.seurat[common.features, ], Chond_seurat[common.features, ], gh_big_humans[common.features, ], liver[common.features, ], teratoma_chond[common.features, ]), project = "Combined.common")
Combined.common <- merge(Combined.common[common.features, ], dicks_combined[common.features, ], project = "Combined.common")
#merge keeping all genes
Combined.common.allGenes <- merge(ANT1.2, y = c(chondro.seurat, Chond_seurat, gh_big_humans, liver, teratoma_chond), project = "Combined.common")
Combined.common.allGenes <- merge(Combined.common.allGenes, dicks_combined, project = "Combined.common")
Combined.common.allGenes_noliver <- merge(ANT1.2, y = c(chondro.seurat, Chond_seurat, gh_big_humans, teratoma_chond), project = "Combined.common.noliver")
Combined.common.allGenes_noliver <- merge(Combined.common.allGenes_noliver, dicks_combined, project = "Combined.common.noliver")
saveRDS(Combined.common, "data/external_scRNA/Combined_singlecell_data_first.rds")
saveRDS(Combined.common.allGenes, "data/external_scRNA/Combined_singlecell_data_allGenes.rds")
saveRDS(Combined.common.allGenes_noliver, "data/external_scRNA/Combined_singlecell_data_allGenes_noliver.rds")
}
```