In [None]:
project_dir = "~/ben_lustre/current_projects/kidney_glomTI_response"
setwd(project_dir)
suppressPackageStartupMessages(source("https://raw.githubusercontent.com/bjstewart1/helpful_singlecell/main/functions.R"))
ad <- import("anndata")
sc <- import("scanpy")
scv <- import("scvelo")
sp = import("scipy")
clean_theme <- theme_bw() + theme(axis.text = element_text(color = "black"), axis.title = element_text(color = "black"))

In [None]:
py_config()

In [None]:
library(Seurat)
lakedir = '~/ben_lustre/current_projects/kidney_glomTI_response/data/public_data/lake_2023'
sobj = readRDS(file.path(lakedir, 'cxg_lake_2023.rds'))

#convert to anndata objects
#get gene info
var_df = sobj[['RNA']]@meta.features
var_df$ID = rownames(var_df)
var_df$Symbol = rownames(var_df) = var_df$feature_name

#nuclei
nucleus_sobj = sobj[, sobj@meta.data$suspension_type %in% 'nucleus']
nuclei_adata = ad$AnnData(X = t(nucleus_sobj[['RNA']]@counts), var = var_df)
nuclei_adata$obs_names = colnames(nucleus_sobj[['RNA']]@counts)
#read metadata 
nuclei_metadata = read.table(file.path(lakedir, 'sn_rnaseq', 'GSE183277_Kidney_Healthy-Injury_Cell_Atlas_snCv3_Metadata_03282022.txt'))
#add metadata
nuclei_metadata = nuclei_metadata[nuclei_adata$obs_names$values, ]
nuclei_adata$obs = nuclei_metadata
nuclei_adata$layers = list("counts" = nuclei_adata$X)

#cells
cell_sobj = sobj[, sobj@meta.data$suspension_type %in% 'cell']
cell_adata = ad$AnnData(X = t(cell_sobj[['RNA']]@counts), var = var_df)
cell_adata$obs_names = colnames(cell_sobj[['RNA']]@counts)
cell_metadata = read.table(file.path(lakedir, 'sc_rnaseq', 'GSE183276_Kidney_Healthy-Injury_Cell_Atlas_scCv3_Metadata_03282022.txt'))
#add metadata
cell_metadata = cell_metadata[cell_adata$obs_names$values, ]
cell_adata$obs = cell_metadata
cell_adata$layers = list("counts" = cell_adata$X)

In [None]:
nuclei_adata$write_h5ad('data/public_data/lake_2023/lake_SNrnaseq.h5ad')
cell_adata$write_h5ad('data/public_data/lake_2023/lake_SCrnaseq.h5ad')

In [None]:
#read in the atac data
library(Signac)
library(Seurat)

library(Pando)
#genomes
library(GenomicRanges)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v86)
library(rtracklayer)

atac = readRDS('./data/public_data/lake_2023/atac/GSE183273_Kidney_Healthy-Injury_Cell_Atlas_SNARE2-AC_Peak-Counts_03282022.RDS')

In [None]:
#get the annotation
message("getting genome annotation")
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotation) <- "UCSC"
#get the counts
message("making chromatin assay")
chrom_assay <- CreateChromatinAssay(counts = atac,
  sep = c(":", "-"),
  genome = 'hg38',
  fragments = './data/public_data/lake_2023/atac/GSE183273_BUKMAP.fragments.sort.tsv.gz',  
                                     annotation = annotation)

In [None]:
atac_sobj = CreateSeuratObject(counts = chrom_assay, assay = "peaks")

In [None]:
atac_sobj

In [None]:
atac_meta = read.table('./data/public_data/lake_2023/atac/GSE183273_Kidney_Healthy-Injury_Cell_Atlas_SNARE2-RNA-AC_Metadata_03282022.txt')

In [None]:
atac_sobj@meta.data = cbind(atac_sobj@meta.data, atac_meta)

In [None]:
atac_sobj@meta.data$cell_type = atac_sobj@meta.data$subclass.full

In [None]:
Idents(atac_sobj) = atac_sobj@meta.data$cell_type

In [None]:
saveRDS(atac_sobj, "./data/public_data/lake_2023/lake_atac.RDS")

In [None]:
#slideseq data
#get the slideseq sample names
ss_dir = '~/ben_lustre/current_projects/kidney_glomTI_response/data/public_data/lake_2023/slideseq'
ss_samples = unique(unlist(
lapply(strsplit(list.files(ss_dir)
, "_"), function(x){x[1]})))

In [None]:
mapped = read.csv(file.path(ss_dir, 'GSM6128258_Puck_210412_02.MappedDGEForR.csv'))

In [None]:
lapply(ss_samples, function(gsm){
    
    
})

In [None]:
gsm = 'GSM6128229'
message(gsm)
gsm_files = grep(gsm, list.files(ss_dir), value = TRUE)
bc_file = grep("barcode_matching", gsm_files, value = TRUE)
matrix_file = grep("matrix.mtx", gsm_files, value = TRUE)
feature_file = grep("expression_features", gsm_files, value = TRUE)
expression_matrix = Matrix::readMM(file.path(ss_dir, matrix_file))
spatial_file = grep("BeadLocationsForR", gsm_files, value = TRUE)
spatial_type = 'bead_loc'
if(length(spatial_file) == 0){
    spatial_file = grep("barcode_matching", gsm_files, value = TRUE)
    spatial_type = 'bc_match'
}
expression_matrix = Matrix::readMM(file.path(ss_dir, matrix_file))
barcodes = read.csv(file.path(ss_dir, bc_file), sep = '\t', header = FALSE)
features = read.csv(file.path(ss_dir, feature_file), sep = '\t', header = FALSE)
locations = read.csv(file.path(ss_dir, spatial_file), header = FALSE, sep = '\t')

In [None]:
#generate an adata
ad$AnnData(X = )

In [None]:
dim(expression_matrix)

In [None]:
gsm = 'GSM6128237'

In [None]:
gsm_files = grep(gsm, list.files(ss_dir), value = TRUE)
bc_file = grep("barcode_matching", gsm_files, value = TRUE)
matrix_file = grep("matrix.mtx", gsm_files, value = TRUE)

In [None]:
expression_matrix = Matrix::readMM(file.path(ss_dir, matrix_file))

In [None]:
###now do slideseq data.... 

In [None]:
bc_map = read.table(file.path(ss_dir, 'GSM6128237_Puck_210113_35_barcode_matching.txt'), row.names = 1)



In [None]:
expression_matrix = read.table(file.path(ss_dir, 'GSM6128237_Puck_210113_35.matched.digital_expression_matrix.tsv'))

In [None]:
expression_matrix

In [None]:
 GSM6128237_Puck_210113_35_barcode_matching.txt
 GSM6128237_Puck_210113_35.matched.digital_expression_barcodes.tsv
GSM6128237_Puck_210113_35.matched.digital_expression_features.tsv
GSM6128237_Puck_210113_35.matched.digital_expression_matrix.mtx

In [None]:
bc_map

In [None]:
#single cell data
sc_data = readRDS("./data/public_data/lake_2023/sc_rnaseq/GSE183276_Kidney_Healthy-Injury_Cell_Atlas_scCv3_Counts_03282022.RDS")

In [None]:
sc_data_small = sc_data[, 1:500]

In [None]:
np = import("numpy")

In [None]:
scaled_data = np$exp(as.matrix(sc_data_small)) - 1

In [None]:
rowMeans(sc_data_small)

In [None]:
table(c(sc_data_small[, 50], sc_data_small[, 51]))