-
Notifications
You must be signed in to change notification settings - Fork 0
/
external_scRNA.Rmd
177 lines (141 loc) · 11.5 KB
/
external_scRNA.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
---
title: "external_scRNA"
author: "Anthony Hung"
date: "2021-01-19"
output: workflowr::wflow_html
editor_options:
chunk_output_type: console
---
# Load in datasets for topic model and merge them into one seurat object
## Download datasets from GEO
Some of the data included in this model come from publically available datasets from published studies, housed in GEO.
Run the following code from the home directory of this repo to download the necessary datasets.
```{bash eval=FALSE}
mkdir data/external_scRNA
cd data/external_scRNA
#download Ji et al 2018 data of Chondrocytes from OA patients run through STRT single cell RNA protocol from GEO (Series GSE104782)
mkdir Jietal2018
cd Jietal2018
wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE104nnn/GSE104782/suppl/GSE104782_allcells_UMI_count.txt.gz #raw count data
wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE104nnn/GSE104782/suppl/GSE104782_Table_Cell_quality_information_and_clustering_information.xlsx # clustering information for cells in the raw data
#download Wu et al 2021 data of iPSC-Chondrocyte chondrogenic pellet differentiation time course from GEO (Series GSE160625). Data include barcodes, genes, and count matrices from samples taken from iPSCs and samples from 7, 14, 28, and 42 day chondrogenic pellets treated with chondrogenic media containing a WNT and MITF inhibitor
cd ..
mkdir Wuetal2021
cd Wuetal2021
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876128/suppl/GSM4876128_hiPSC_barcodes.tsv.gz # iPSC barcodes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876128/suppl/GSM4876128_hiPSC_genes.tsv.gz # iPSC genes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876128/suppl/GSM4876128_hiPSC_matrix.mtx.gz # iPSC count matrix
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876136/suppl/GSM4876136_C59_D7_barcodes.tsv.gz # day 7 pellet barcodes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876136/suppl/GSM4876136_C59_D7_genes.tsv.gz # day 7 pellet genes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876136/suppl/GSM4876136_C59_D7_matrix.mtx.gz # day 7 pellet count matrix
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876137/suppl/GSM4876137_C59_D14_barcodes.tsv.gz # day 14 pellet barcodes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876137/suppl/GSM4876137_C59_D14_genes.tsv.gz # day 14 pellet genes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876137/suppl/GSM4876137_C59_D14_matrix.mtx.gz # day 14 pellet count matrix
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876138/suppl/GSM4876138_C59_D28_barcodes.tsv.gz # day 28 pellet barcodes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876138/suppl/GSM4876138_C59_D28_genes.tsv.gz # day 28 pellet genes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876138/suppl/GSM4876138_C59_D28_matrix.mtx.gz # day 28 pellet count matrix
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876139/suppl/GSM4876139_C59_D42_barcodes.tsv.gz # day 42 pellet barcodes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876139/suppl/GSM4876139_C59_D42_genes.tsv.gz # day 42 pellet genes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4876nnn/GSM4876139/suppl/GSM4876139_C59_D42_matrix.mtx.gz # day 42 pellet count matrix
#download data for Chou et al 2020 from GEO (Sample GSM4626766) for chondrocytes from Minimally damaged outer lateral tibial (oLT) plateau cartilage isolated from one male individual
cd ..
mkdir Chou_et_al2020
cd Chou_et_al2020
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4626nnn/GSM4626766/suppl/GSM4626766_OA_oLT_113.barcodes.tsv.gz # barcodes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4626nnn/GSM4626766/suppl/GSM4626766_OA_oLT_113.genes.tsv.gz # genes
wget ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4626nnn/GSM4626766/suppl/GSM4626766_OA_oLT_113.matrix.mtx.gz # count matrix
```
Load in datasets and create Seurat objects for each of them. Add metadata about the cell type they were assigned in their original contexts. Merge the Seurat object into one large object.
```{r Loading in data matrices}
library(Matrix)
#iPSC-Chondrocyte data from the current study
ANT1.2 <- readRDS("data/ANT1_2.rds")
ANT1.2 <- Seurat::AddMetaData(ANT1.2, "iPSC-Chondrocyte", col.name = "Cell.Type")
dim(ANT1.2)
#Chondrocytes from OA patients (https://www.ncbi.nlm.nih.gov/pubmed/30026257) run through STRT protocol
# load raw count data
ji_chondro <- read.table("data/external_scRNA/GSE104782_allcells_UMI_count.txt.gz", header = T)
ji_chondro_genes <- ji_chondro[,1]
ji_chondro_data <- ji_chondro[,-1]
ji_chondro <- ji_chondro_data
rownames(ji_chondro) <- ji_chondro_genes
# load clustering assignment data from paper
ji_chondro_metadata <- read_csv("data/external_scRNA/GSE104782_Table_Cell_quality_information_and_clustering_information.csv")
# subset count data to include only those cells that were assigned to a cluster labeled as being a chondrocyte in the original paper
ji_chondro.filtered <- ji_chondro[,!is.na(ji_chondro_metadata$Cluster)]
cluster.ident <- paste0("Chondro ", ji_chondro_metadata$Cluster[!is.na(ji_chondro_metadata$Cluster)])
# Create seurat object from chondrocytes data
ji_chondro.seurat <- CreateSeuratObject(ji_chondro.filtered, project="Chondrocytes")
ji_chondro.seurat <- Seurat::AddMetaData(ji_chondro.seurat, cluster.ident, col.name = "Cell.Type")
dim(ji_chondro.seurat)
#iPSC chondrogenic cells from directed differentiation time course from iPSC chondrogenic pellet culture. https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE160625
count_data_wu_iPSC <- readMM("data/external_scRNA/Wuetal2021/GSM4876128_hiPSC_matrix.mtx.gz")
genes_wu_iPSC <- read_tsv("data/external_scRNA/Wuetal2021/GSM4876128_hiPSC_genes.tsv.gz", col_names = F)
barcodes_wu_iPSC <- as.data.frame(read_tsv( "data/external_scRNA/Wuetal2021/GSM4876128_hiPSC_barcodes.tsv.gz", col_names = F))
rownames(count_data_wu_iPSC) <- genes_wu_iPSC$X2
colnames(count_data_wu_iPSC) <- barcodes_wu_iPSC$X1
wu_iPSC_seurat <- CreateSeuratObject(counts = count_data_wu_iPSC, project = "iPSC")
wu_iPSC_seurat <- Seurat::AddMetaData(wu_iPSC_seurat, "Wu_iPSC", col.name = "Cell.Type")
count_data_wu_d7 <- readMM("data/external_scRNA/Wuetal2021/GSM4876136_C59_D7_matrix.mtx.gz")
genes_wu_d7 <- read_tsv("data/external_scRNA/Wuetal2021/GSM4876136_C59_D7_genes.tsv.gz", col_names = F)
barcodes_wu_d7 <- as.data.frame(read_tsv( "data/external_scRNA/Wuetal2021/GSM4876136_C59_D7_barcodes.tsv.gz", col_names = F))
rownames(count_data_wu_d7) <- genes_wu_d7$X2
colnames(count_data_wu_d7) <- barcodes_wu_d7$X1
wu_d7_seurat <- CreateSeuratObject(counts = count_data_wu_d7, project = "d7")
wu_d7_seurat <- Seurat::AddMetaData(wu_d7_seurat, "Wu_chondrogenic_pellet_d7", col.name = "Cell.Type")
count_data_wu_d14 <- readMM("data/external_scRNA/Wuetal2021/GSM4876137_C59_D14_matrix.mtx.gz")
genes_wu_d14 <- read_tsv("data/external_scRNA/Wuetal2021/GSM4876137_C59_D14_genes.tsv.gz", col_names = F)
barcodes_wu_d14 <- as.data.frame(read_tsv( "data/external_scRNA/Wuetal2021/GSM4876137_C59_D14_barcodes.tsv.gz", col_names = F))
rownames(count_data_wu_d14) <- genes_wu_d14$X2
colnames(count_data_wu_d14) <- barcodes_wu_d14$X1
wu_d14_seurat <- CreateSeuratObject(counts = count_data_wu_d14, project = "d14")
wu_d14_seurat <- Seurat::AddMetaData(wu_d14_seurat, "Wu_chondrogenic_pellet_d14", col.name = "Cell.Type")
count_data_wu_d28 <- readMM("data/external_scRNA/Wuetal2021/GSM4876138_C59_D28_matrix.mtx.gz")
genes_wu_d28 <- read_tsv("data/external_scRNA/Wuetal2021/GSM4876138_C59_D28_genes.tsv.gz", col_names = F)
barcodes_wu_d28 <- as.data.frame(read_tsv( "data/external_scRNA/Wuetal2021/GSM4876138_C59_D28_barcodes.tsv.gz", col_names = F))
rownames(count_data_wu_d28) <- genes_wu_d28$X2
colnames(count_data_wu_d28) <- barcodes_wu_d28$X1
wu_d28_seurat <- CreateSeuratObject(counts = count_data_wu_d28, project = "d28")
wu_d28_seurat <- Seurat::AddMetaData(wu_d28_seurat, "Wu_chondrogenic_pellet_d28", col.name = "Cell.Type")
count_data_wu_d42 <- readMM("data/external_scRNA/Wuetal2021/GSM4876139_C59_D42_matrix.mtx.gz")
genes_wu_d42 <- read_tsv("data/external_scRNA/Wuetal2021/GSM4876139_C59_D42_genes.tsv.gz", col_names = F)
barcodes_wu_d42 <- as.data.frame(read_tsv( "data/external_scRNA/Wuetal2021/GSM4876139_C59_D42_barcodes.tsv.gz", col_names = F))
rownames(count_data_wu_d42) <- genes_wu_d42$X2
colnames(count_data_wu_d42) <- barcodes_wu_d42$X1
wu_d42_seurat <- CreateSeuratObject(counts = count_data_wu_d42, project = "d42")
wu_d42_seurat <- Seurat::AddMetaData(wu_d42_seurat, "Wu_chondrogenic_pellet_d42", col.name = "Cell.Type")
wu_combined <- merge(wu_iPSC_seurat, y = c(wu_d7_seurat, wu_d14_seurat, wu_d28_seurat, wu_d42_seurat), project = "Combined.common")
wu_combined[["percent.mt"]] <- PercentageFeatureSet(wu_combined, pattern = "^MT-")
wu_combined <- subset(wu_combined, subset = percent.mt < 25)
dim(wu_combined)
#Chondrocytes from healthy oLT of a male human with OA (outer lateral tibia) https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM4626766
chou_chondro_counts <- readMM("data/external_scRNA/Chou_et_al/GSM4626766_OA_oLT_113.matrix.mtx.gz")
chou_chondro_genes <- read_tsv("data/external_scRNA/Chou_et_al/GSM4626766_OA_oLT_113.genes.tsv.gz", col_names = F)
chou_chondro_barcodes <- as.data.frame(read_tsv("data/external_scRNA/Chou_et_al/GSM4626766_OA_oLT_113.barcodes.tsv.gz", col_names = F))
#make Seurat object
rownames(chou_chondro_counts) <- chou_chondro_genes$X2
colnames(chou_chondro_counts) <- chou_chondro_barcodes$X1
chou_chondro_seurat <- CreateSeuratObject(counts = chou_chondro_counts, project = "Chondrocytes") %>%
AddMetaData("Chondrocyte_", col.name = "Cell.Type")
dim(chou_chondro_seurat)
#iPSCs/MSCs/ipsc-chondrocytes/ipsc-osteoblasts from Genevieve Housman. Combine all the datasets located in the data directory. Each of these datasets contain single cell data from cells from the same human individual from which iPSCs were created. 10x single cell RNA sequencing data from iPSCs, MSCs, iPSC-Osteoblasts, and iPSC-chondrocytes are included after filtering for barcodes with fewer than 25% reads coming from mitochondria.
GH_iPSC <- readRDS("data/external_scRNA/GH_cells/gh_ipsc.rds")
GH_iPSCMSC <- readRDS("data/external_scRNA/GH_cells/gh_msc.rds")
GH_iPSCChond <- readRDS("data/external_scRNA/GH_cells/gh_ipscChond.rds")
GH_iPSCOsteo <- readRDS("data/external_scRNA/GH_cells/gh_ipscOsteo.rds")
#merge all the datasets into one seurat object
GH_merged <- merge(GH_iPSC, y = c(GH_iPSCMSC, GH_iPSCChond, GH_iPSCOsteo),
add.cell.ids = c("ipsc", "msc", "chondrogh", "osteo"), project = "ghousman")
GH_merged <- Seurat::AddMetaData(GH_merged, "Ghousman", col.name = "orig.ident")
dim(GH_merged)
#Human Liver samples run through 10x (https://doi.org/10.1038/s41467-018-06318-7). This is a subset of the entire dataset, which is from whole liver homogenate. Following the example from the original paper, scViz was used to subset the data for clusters that were called hepatocytes in the original study (subset contains a total of 3490 cells)
liver_data <- readRDS("data/external_scRNA/humanLiverSubset_hepatocytes.rds")
liver_data <- Seurat::AddMetaData(liver_data, "Hepatocyte", col.name = "Cell.Type")
dim(liver_data)
# Merge Datasets
# Merge the seurat objects into one large one, keeping metadata about which study they originated in as well as their assigned cell type.
Merged_external_data <- merge(ANT1.2, y = c(ji_chondro.seurat, chou_chondro_seurat, GH_merged, liver_data), project = "Combined.common")
Merged_external_data <- merge(Merged_external_data, wu_combined, project = "Combined.common")
dim()
saveRDS(Merged_external_data, "data/external_scRNA/merged_external_scRNA.rds")
```