In [1]:
library(Seurat)
library(SeuratDisk)
library(anndata)
library(reticulate)
# use_python("/usr/bin/python")

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t


Registered S3 method overwritten by 'SeuratDisk':
  method            from  
  as.sparse.H5Group Seurat


Attaching package: ‘anndata’


The following object is masked from ‘package:SeuratObject’:

    Layers




In [2]:
#' Regularise dataframe
#'
#' This function checks if certain columns of a dataframe is of a single value
#' and drop them if required
#'
#' @param df Input data frame, usually cell metadata table (data.frame-like
#'   object)
#' @param drop_single_values Drop columns with only a single value (logical)
#'
#' @return Dataframe
.regularise_df <- function(df, drop_single_values=FALSE, drop_na_values=TRUE) {
  if (ncol(df) == 0) df[["name"]] <- rownames(df)
  if (drop_single_values) {
    k_singular <- sapply(df, function(x) length(unique(x)) == 1)
    if (sum(k_singular) > 0) {
      warning(
        paste("Dropping single category variables:"),
        paste(colnames(df)[k_singular], collapse = ", ")
      )
    }
    df <- df[, !k_singular, drop = F]
    if (ncol(df) == 0) df[["name"]] <- rownames(df)
  }
 if (drop_na_values) {
    k_na <- sapply(df, function(x) sum(is.na(x))==length(x))
    if (sum(k_na) > 0) {
      warning(
        paste("Dropping NA category variables:"),
        paste(colnames(df)[k_na], collapse = ", ")
      )
    }
    df <- df[, !k_na, drop = F]
    if (ncol(df) == 0) df[["name"]] <- rownames(df)
  }
  return(df)
}


#' Prepare cell metadata
#'
#' This function prepare cell metadata from AnnData.obs
#'
#' @param obs_pd Input AnnData.obs dataframe
#' @param assay Assay name, default "RNA" (str)
#'
#' @return AnnData object
#'
#' @import reticulate
.obs2metadata <- function(obs_pd, assay = "RNA") {
  # obs_df <- .regularise_df(obs_pd, drop_single_values=FALSE, drop_na_values=TRUE)
  obs_df <- obs_pd
  colnames(obs_df) <- sub("n_counts", paste0("nCounts_", assay), colnames(obs_df))
  colnames(obs_df) <- sub("n_genes", paste0("nFeatures_", assay), colnames(obs_df))
  if("pct_counts_mt" %in% colnames(obs_df)) colnames(obs_df) <- sub("pct_counts_mt", "percent.mt", colnames(obs_df))
  if("pct_counts_rb" %in% colnames(obs_df)) colnames(obs_df) <- sub("pct_counts_rb", "percent.rb", colnames(obs_df))
  if("pct_counts_hb" %in% colnames(obs_df)) colnames(obs_df) <- sub("pct_counts_hb", "percent.hb", colnames(obs_df))
  return(obs_df)
}


#' Prepare feature metadata
#'
#' This function prepare feature metadata from AnnData.var
#'
#' @param var_pd Input AnnData.var dataframe
#'
#' @return AnnData object
#'
#' @import reticulate
.var2feature_metadata <- function(var_pd) {
  # var_df <- .regularise_df(var_pd, drop_single_values=FALSE, drop_na_values=TRUE)
  var_df <- var_pd
  colnames(var_df) <- sub("dispersions_norm", "mvp.dispersion.scaled", colnames(var_df))
  colnames(var_df) <- sub("dispersions", "mvp.dispersion", colnames(var_df))
  colnames(var_df) <- sub("means", "mvp.mean", colnames(var_df))
  colnames(var_df) <- sub("highly_variable", "highly.variable", colnames(var_df))
  return(var_df)
}


.uns2misc <- function(ad, target_uns_keys = list()) {
  uns_keys <- intersect(target_uns_keys, ad$uns_keys())
  misc <- sapply(uns_keys, function(x) ad$uns[x], simplify = FALSE, USE.NAMES = TRUE)
  return(misc)
}

In [3]:
AnndataToSeurat <- function(adata, outFile = NULL, main_layer = "counts", assay = "RNA", project_name = "Seurat Project", target_uns_keys = list()) {
  main_layer <- match.arg(main_layer, c("counts", "data", "scale.data"))
  sp <- reticulate::import("scipy.sparse", convert = FALSE)
  
  obs_df <- .obs2metadata(adata$obs)
  var_df <- .var2feature_metadata(adata$var)
  X <- t(adata$X)
  colnames(X) <- rownames(obs_df)
  rownames(X) <- rownames(var_df)

#   if ('scale.data' %in% names(adata$layers)){
#     srat <- CreateSeuratObject(counts = X, data = t(adata$layers['scale.data']), project = project_name, meta.data = obs_df)
#     message("X -> counts; scale.data -> data")
#   } else {
#     srat <- CreateSeuratObject(counts = X, project = project_name, meta.data = obs_df)
#     message("X -> counts")
#   }

  srat <- CreateSeuratObject(counts = X, project = project_name, meta.data = obs_df)
  message("X -> counts")
  
  # Add AnnData layers to assays
  for (layer in names(adata$layers)){
    if (layer != 'scale.data'){
        srat[[layer]] <- CreateAssayObject(data = t(adata$layers[layer]))
    }
    message("Adding AnnData layers to Seurat assays")
  }

  DefaultAssay(srat) <- assay

  # Add dimension reductions
  embed_names <- unlist(adata$obsm_keys())
  if (length(embed_names) > 0) {
    embeddings <- sapply(embed_names, function(x) as.matrix(adata$obsm[[x]]), simplify = FALSE, USE.NAMES = TRUE)
    names(embeddings) <- embed_names
      for (name in embed_names) {
        rownames(embeddings[[name]]) <- colnames(srat[[assay]])
      }

      dim.reducs <- vector(mode = "list", length = length(embeddings))
      for (i in seq(length(embeddings))) {
        name <- embed_names[i]
        embed <- embeddings[[name]]
        key <- switch(name,
          sub("_(.*)", "\\L\\1", sub("^X_", "", toupper(name)), perl = T),
          "X_pca" = "PC",
          "X_tsne" = "tSNE",
          "X_umap" = "UMAP"
        )
        colnames(embed) <- paste0(key, "_", seq(ncol(embed)))
        dim.reducs[[i]] <- Seurat::CreateDimReducObject(
          embeddings = embed,
          loadings = new("matrix"),
          assay = assay,
          stdev = numeric(0L),
          key = paste0(key, "_")
        )
      }
      names(dim.reducs) <- sub("X_", "", embed_names)

      for (name in names(dim.reducs)) {
        srat[[name]] <- dim.reducs[[name]]
        message("Adding AnnData embeddings to Seurat assays")
      } 
  }

  srat@misc <- .uns2misc(adata, target_uns_keys = target_uns_keys)

  # if (!is.null(outFile)) SaveH5Seurat(srat, filename = outFile, overwrite = TRUE, verbose = FALSE)
  if (!is.null(outFile)) saveRDS(object = srat, file = outFile)
  srat
}


In [3]:
ad <- read_h5ad('../../../oscb/user_storage/Benchmarks/facs-Bladder_1751302627486/QC/results/313b1738828fdf0d5157af2b12a71be6/facs_Bladder_MAGIC_imputation.h5ad')

In [4]:
ad

AnnData object with n_obs × n_vars = 1378 × 2000
    obs: 'orig.ident', 'n_counts', 'n_genes', 'nReads', 'plate.barcode', 'mouse.id', 'tissue', 'subtissue', 'FACS.selection', 'mouse.sex', 'percent.ercc', 'free_annotation', 'cell_ontology_class', 'percent.ribo', 'res.0.4', 'cluster.ids', 'cell_ontology_id', 'pct_counts_mt', 'pct_counts_rb', 'pct_counts_hb', 'percent.plat', 'RNA_snn_res.0.5', 'seurat_clusters', 'doublet_score', 'doublet_class', 'leiden', 'louvain', 'MAGIC_leiden', 'MAGIC_louvain'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'highly_variable'
    uns: 'MAGIC_leiden', 'MAGIC_louvain', 'leiden', 'louvain', 'neighbors', 'pca'
    obsm: 'MAGIC_pca', 'MAGIC_tsne', 'MAGIC_tsne_3D', 'MAGIC_umap', 'MAGIC_umap_3D', 'X_pca', 'X_tsne', 'X_umap', 'X_umap_3D'
    varm: 'PCs'
    layers: 'MAGIC', 'raw_counts', 'scale.data'
    obsp: 'connectivities', 'distances'

In [15]:
as.matrix(ad$X)

Unnamed: 0,Cyp2e1,Cxcl10,Adm,Cyp1a1,Reg3g,Gadd45g,Sprr2f,Rbp4,Sprr2g,Car3,⋯,Upb1,Plod1,Lpin3,Mettl7a1,Tmem184a,Prune2,Psmb11,D14Ertd668e,Xlr,C2cd4a
A1.B000610.3_56_F.1.1,36,546,0,0,0,1875,0,425,0,1549,⋯,0,0,0,0,0,0,0,0,0,0
A1.B002764.3_38_F.1.1,0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
A1.B002771.3_39_F.1.1,0,0,0,0,0,514,0,242,0,4125,⋯,0,25,0,64,0,0,0,173,0,0
A1.D041914.3_8_M.1.1,150,0,0,0,0,994,0,6,0,19,⋯,0,296,0,0,0,0,0,1,0,0
A1.D042253.3_9_M.1.1,159,390,0,0,0,136,0,18,0,88,⋯,0,12,0,0,0,0,0,0,0,0
A1.MAA000487.3_10_M.1.1,0,0,158,0,0,60,0,10,0,0,⋯,0,235,0,0,0,0,0,0,0,0
A10.B000610.3_56_F.1.1,0,0,111,0,0,0,0,0,0,0,⋯,0,0,164,13,0,0,0,0,0,0
A10.B002764.3_38_F.1.1,0,0,1,0,471,0,0,0,0,0,⋯,0,0,102,0,0,0,0,571,0,0
A10.B002771.3_39_F.1.1,0,0,0,0,997,0,0,121,69,0,⋯,0,0,0,0,0,0,0,0,0,0
A10.D041914.3_8_M.1.1,0,0,22,0,0,1567,0,243,0,5569,⋯,0,338,32,45,0,0,0,0,0,0


In [16]:
typeof(ad$X) == 'S4'

In [4]:
ad$obs

Unnamed: 0_level_0,orig.ident,n_counts,n_genes,nReads,plate.barcode,mouse.id,tissue,subtissue,FACS.selection,mouse.sex,⋯,pct_counts_hb,percent.plat,RNA_snn_res.0.5,seurat_clusters,doublet_score,doublet_class,leiden,louvain,MAGIC_leiden,MAGIC_louvain
Unnamed: 0_level_1,<fct>,<dbl>,<int>,<dbl>,<fct>,<fct>,<fct>,<fct>,<fct>,<fct>,⋯,<dbl>,<dbl>,<fct>,<fct>,<dbl>,<fct>,<fct>,<fct>,<fct>,<fct>
A1.B000610.3_56_F.1.1,Bladder,119565,364,610727,B000610,3_56_F,Bladder,,Multiple,F,⋯,0,0,0,0,0.06060606,Singlet,1,5,11,7
A1.B002764.3_38_F.1.1,Bladder,3184,90,320035,B002764,3_38_F,Bladder,,Multiple,F,⋯,0,0,4,4,0.09090909,Singlet,3,2,24,3
A1.B002771.3_39_F.1.1,Bladder,197586,489,1044981,B002771,3_39_F,Bladder,,Multiple,F,⋯,0,0,0,0,0.06666667,Singlet,1,5,14,4
A1.D041914.3_8_M.1.1,Bladder,70714,405,447232,D041914,3_8_M,Bladder,,Multiple,M,⋯,0,0,0,0,0.03030303,Singlet,0,5,1,5
A1.D042253.3_9_M.1.1,Bladder,51411,528,330249,D042253,3_9_M,Bladder,,Multiple,M,⋯,0,0,0,0,0.11515152,Singlet,1,0,6,6
A1.MAA000487.3_10_M.1.1,Bladder,159625,519,748761,MAA000487,3_10_M,Bladder,,Multiple,M,⋯,0,0,2,2,0.13333333,Singlet,0,7,9,11
A10.B000610.3_56_F.1.1,Bladder,274845,459,1486054,B000610,3_56_F,Bladder,,Multiple,F,⋯,0,0,1,1,0.06666667,Singlet,2,3,7,15
A10.B002764.3_38_F.1.1,Bladder,878122,375,3783292,B002764,3_38_F,Bladder,,Multiple,F,⋯,0,0,1,1,0.10303030,Singlet,2,3,20,21
A10.B002771.3_39_F.1.1,Bladder,147336,346,947995,B002771,3_39_F,Bladder,,Multiple,F,⋯,0,0,1,1,0.08484848,Singlet,2,3,15,18
A10.D041914.3_8_M.1.1,Bladder,144910,615,966858,D041914,3_8_M,Bladder,,Multiple,M,⋯,0,0,0,0,0.18787879,Singlet,1,5,1,20


In [5]:
obs_df <- .obs2metadata(ad$obs)

srat <- CreateSeuratObject(counts=t(as.matrix(ad$X)), meta.data=obs_df, project = "Lung")
srat

“Data is of class matrix. Coercing to dgCMatrix.”


An object of class Seurat 
2000 features across 1378 samples within 1 assay 
Active assay: RNA (2000 features, 0 variable features)
 1 layer present: counts

In [18]:
srat$RNA$counts

  [[ suppressing 32 column names ‘A1.B000610.3_56_F.1.1’, ‘A1.B002764.3_38_F.1.1’, ‘A1.B002771.3_39_F.1.1’ ... ]]



2000 x 1378 sparse Matrix of class "dgCMatrix"
                                                                             
Cyp2e1           36    .     .   150  159     .     .     .     .     .     .
Cxcl10          546    .     .     .  390     .     .     .     .     .     .
Adm               .    .     .     .    .   158   111     1     .    22     .
Cyp1a1            .    .     .     .    .     .     .     .     .     .     .
Reg3g             .    .     .     .    .     .     .   471   997     .     .
Gadd45g        1875    .   514   994  136    60     .     .     .  1567   162
Sprr2f            .    .     .     .    .     .     .     .     .     .     .
Rbp4            425    .   242     6   18    10     .     .   121   243     .
Sprr2g            .    .     .     .    .     .     .     .    69     .     .
Car3           1549    .  4125    19   88     .     .     .     .  5569     .
Has1              .    .     .     .    .  2833     1     .     .  1076    57
Spon2          21

In [9]:
srat$RNA$data <- srat$RNA$counts
srat$RNA$data

  [[ suppressing 32 column names ‘CCACCTTGTTCGTCCT-1-WT_uninfected’, ‘ATGGGATAGGTTGCAC-1-WT_uninfected’, ‘GCGTTTCCACAATCCC-1-WT_uninfected’ ... ]]

  [[ suppressing 32 column names ‘CCACCTTGTTCGTCCT-1-WT_uninfected’, ‘ATGGGATAGGTTGCAC-1-WT_uninfected’, ‘GCGTTTCCACAATCCC-1-WT_uninfected’ ... ]]

  [[ suppressing 32 column names ‘CCACCTTGTTCGTCCT-1-WT_uninfected’, ‘ATGGGATAGGTTGCAC-1-WT_uninfected’, ‘GCGTTTCCACAATCCC-1-WT_uninfected’ ... ]]



33696 x 25814 sparse Matrix of class "dgCMatrix"
                                                                                        
Xkr4               .         .         .         .         .         .         .        
Gm1992             .         .         .         .         .         .         .        
Gm19938            .         .         .         .         .         .         .        
Gm37381            .         .         .         .         .         .         .        
Rp1                .         .         .         .         .         .         .        
Sox17              .         .         .         .         .         .         .        
Gm37587            .         .         .         .         .         .         .        
Gm37323            .         .         .         .         .         .         .        
Mrpl15             0.3115620 0.9212925 0.5042464 0.5445502 0.1485443 0.5945919 0.5099012
A930006A01Rik      .         .         .         .         . 

In [6]:
srat <- NormalizeData(object = srat, verbose = FALSE)
srat <- FindVariableFeatures(object = srat, nfeatures = 3000, verbose = FALSE, selection.method = 'vst')
srat <- ScaleData(srat, verbose = FALSE)
srat <- RunPCA(srat, npcs = 20, verbose = FALSE)
srat <- FindNeighbors(srat, dims = 1:20)
srat <- FindClusters(srat, resolution = 0.3)
srat <- RunUMAP(srat, reduction = "pca", dims = 1:20)

Computing nearest neighbor graph

Computing SNN



Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1378
Number of edges: 47849

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8855
Number of communities: 6
Elapsed time: 0 seconds


“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
22:04:51 UMAP embedding parameters a = 0.9922 b = 1.112

22:04:51 Read 1378 rows and found 20 numeric columns

22:04:51 Using Annoy for neighbor search, n_neighbors = 30

22:04:51 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

22:04:52 Writing NN index file to temp file /tmp/Rtmp2Ao971/file26a41287e9bbf

22:04:52 Searching Annoy index using 1 thread, search_k = 3000

22:04:52 Annoy recall = 100%

22:04:53 Commencing smooth kNN distance calibration using 1 thread
 with target n_neighbors = 30

22:04:54 In

In [1]:
library(SingleR)
library(scater)
# ref <- celldex::MouseRNAseqData()
packageVersion("SingleR")

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges

[1] ‘2.0.0’

In [1]:
library(celldex)
packageVersion("celldex")

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: ‘MatrixGenerics’


The following objects are masked from ‘package:matrixStats’:

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges

[1] ‘1.8.0’

In [21]:
# devtools::install_version("dbplyr", version = "2.3.4")

Downloading package from url: https://cloud.r-project.org/src/contrib/Archive/dbplyr/dbplyr_2.3.4.tar.gz






Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [10]:
ref <- celldex::MouseRNAseqData()

snapshotDate(): 2022-10-31

see ?celldex and browseVignettes('celldex') for documentation

loading from cache

see ?celldex and browseVignettes('celldex') for documentation

loading from cache



In [11]:
ref

class: SummarizedExperiment 
dim: 21214 358 
metadata(0):
assays(1): logcounts
rownames(21214): Xkr4 Rp1 ... LOC100039574 LOC100039753
rowData names(0):
colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
  SRR1044044Aligned
colData names(3): label.main label.fine label.ont

In [7]:
ref <- celldex::BlueprintEncodeData()

snapshotDate(): 2022-10-31

see ?celldex and browseVignettes('celldex') for documentation

downloading 1 resources

retrieving 1 resource

loading from cache

see ?celldex and browseVignettes('celldex') for documentation

downloading 1 resources

retrieving 1 resource

loading from cache



In [12]:
reference <- "MouseRNAseqData"

ref <- switch(  
    reference,  
    "MouseRNAseqData" = celldex::MouseRNAseqData(),  
    "HumanPrimaryCellAtlasData" = celldex::HumanPrimaryCellAtlasData(),  
    "DatabaseImmuneCellExpressionData"= celldex::DatabaseImmuneCellExpressionData(),  
    "BlueprintEncodeData" = celldex::BlueprintEncodeData(),
    "ImmGenData" = celldex::ImmGenData(),
    "MonacoImmuneData" = celldex::MonacoImmuneData(),
    "NovershternHematopoieticData" = celldex::NovershternHematopoieticData()
)

ref

snapshotDate(): 2022-10-31

see ?celldex and browseVignettes('celldex') for documentation

loading from cache

see ?celldex and browseVignettes('celldex') for documentation

loading from cache



class: SummarizedExperiment 
dim: 21214 358 
metadata(0):
assays(1): logcounts
rownames(21214): Xkr4 Rp1 ... LOC100039574 LOC100039753
rowData names(0):
colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
  SRR1044044Aligned
colData names(3): label.main label.fine label.ont

In [4]:
library(Seurat)
packageVersion("Seurat")

Attaching SeuratObject

Seurat v4 was just loaded with SeuratObject v5; disabling v5 assays and
validation routines, and ensuring assays work in strict v3/v4
compatibility mode


Attaching package: ‘Seurat’


The following object is masked from ‘package:SummarizedExperiment’:

    Assays




[1] ‘4.4.0’

In [12]:
results_main <- SingleR(test = as.SingleCellExperiment(srat), ref = ref, labels = ref$label.main)

In [13]:
results_main

DataFrame with 25814 rows and 4 columns
                                                                               scores
                                                                             <matrix>
CCACCTTGTTCGTCCT-1-WT_uninfected                       0.386220:0.198203:0.546598:...
ATGGGATAGGTTGCAC-1-WT_uninfected                       0.265245:0.146874:0.621986:...
GCGTTTCCACAATCCC-1-WT_uninfected                       0.277770:0.161960:0.598258:...
TGAGCTATCACGGACT-1-WT_uninfected                       0.242818:0.133347:0.642578:...
GCTCAGTAGGGCTATC-1-WT_uninfected                       0.373790:0.181760:0.513400:...
...                                                                               ...
ATAGGGTAGGCTGTAC-1-Shh_SPL_KO_infected_(PR8_virus) 0.0108234:0.02278774:0.1819194:...
TCATGTTCATGATCGG-1-Shh_SPL_KO_infected_(PR8_virus) 0.0829226:0.04059531:0.1813933:...
GTAAGGGTCTATGAGT-1-Shh_SPL_KO_infected_(PR8_virus) 0.1336956:0.08891221:0.1391265:...
CACCAATGTCTAAG

In [14]:
results_fine <- SingleR(test = as.SingleCellExperiment(srat), ref = ref, labels = ref$label.fine)

In [15]:
results_fine

DataFrame with 25814 rows and 4 columns
                                                                                scores
                                                                              <matrix>
CCACCTTGTTCGTCCT-1-WT_uninfected                       0.436366:0.247826:0.1722562:...
ATGGGATAGGTTGCAC-1-WT_uninfected                       0.343968:0.217236:0.0919264:...
GCGTTTCCACAATCCC-1-WT_uninfected                       0.347286:0.234990:0.1291938:...
TGAGCTATCACGGACT-1-WT_uninfected                       0.319053:0.203985:0.0783220:...
GCTCAGTAGGGCTATC-1-WT_uninfected                       0.440541:0.256997:0.1932526:...
...                                                                                ...
ATAGGGTAGGCTGTAC-1-Shh_SPL_KO_infected_(PR8_virus) 0.0310565:0.0344130: 0.04293734:...
TCATGTTCATGATCGG-1-Shh_SPL_KO_infected_(PR8_virus) 0.0713431:0.0243252: 0.03576507:...
GTAAGGGTCTATGAGT-1-Shh_SPL_KO_infected_(PR8_virus) 0.1180813:0.0766747: 0.07257764:...
CAC

In [13]:
library(ExperimentHub)

Loading required package: AnnotationHub

Loading required package: BiocFileCache

Loading required package: dbplyr


Attaching package: ‘AnnotationHub’


The following object is masked from ‘package:Biobase’:

    cache




In [18]:
eh <- ExperimentHub()
query(eh, "homo Sapiens")

snapshotDate(): 2022-10-31



ExperimentHub with 4008 records
# snapshotDate(): 2022-10-31
# $dataprovider: Eli and Edythe L. Broad Institute of Harvard and MIT, NA, G...
# $species: Homo sapiens, Mus musculus
# $rdataclass: ExpressionSet, SummarizedExperiment, matrix, list, RaggedExpe...
# additional mcols(): taxonomyid, genome, description,
#   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
#   rdatapath, sourceurl, sourcetype 
# retrieve records with, e.g., 'object[["EH1"]]' 

           title                                                              
  EH1    | RNA-Sequencing and clinical data for 7706 tumor samples from The...
  EH166  | ERR188297                                                          
  EH167  | ERR188088                                                          
  EH168  | ERR188204                                                          
  EH169  | ERR188317                                                          
  ...      ...                                   

In [19]:
lung_ref <- eh[['EH1618']]
lung_ref <- lung_ref[,lung_ref$tissue == 'Lung']
lung_ref <- lung_ref[,!is.na(lung_ref$cell_ontology_class)]

Bioconductor version 3.16 (BiocManager 1.30.22), R 4.2.1 (2022-06-23)

Installing package(s) 'TabulaMurisData'

Old packages: 'abind', 'acepack', 'anndata', 'ape', 'aplot', 'askpass',
  'backports', 'BH', 'biglm', 'BiocManager', 'bit', 'bit64', 'bitops',
  'bookdown', 'brio', 'broom', 'bslib', 'cachem', 'callr', 'car', 'caTools',
  'checkmate', 'classInt', 'clue', 'cluster', 'collections', 'colorspace',
  'commonmark', 'corrplot', 'crayon', 'credentials', 'cubature', 'curl',
  'data.table', 'dbplyr', 'dbscan', 'DEoptimR', 'diffobj', 'digest', 'diptest',
  'distr', 'distributional', 'docopt', 'doRNG', 'dotCall64', 'downlit',
  'dqrng', 'DT', 'e1071', 'enrichR', 'evaluate', 'farver', 'fastDummies',
  'fastICA', 'fastmap', 'fastmatch', 'fields', 'fitdistrplus', 'flexmix',
  'FNN', 'fontawesome', 'fpc', 'fs', 'future', 'future.apply', 'gert',
  'ggforce', 'ggfortify', 'ggfun', 'ggplot.multistats', 'ggplot2', 'ggpubr',
  'ggrepel', 'ggsci', 'gh', 'glmnet', 'globals', 'googleVis', 'gplots',


In [22]:
lung_ref <- lung_ref[,!is.na(lung_ref[['cell_ontology_class']])]

In [19]:
lung_ref

class: SingleCellExperiment 
dim: 23341 5404 
metadata(0):
assays(1): counts
rownames(23341): 0610005C13Rik 0610007C21Rik ... Zzef1 Zzz3
rowData names(2): ID Symbol
colnames(5404): 10X_P7_8_AAACGGGAGGATATAC 10X_P7_8_AAACGGGTCTCGTATT ...
  10X_P8_13_TTTGTCACATATGAGA 10X_P8_13_TTTGTCAGTGGTCCGT
colData names(10): cell channel ... cell_ontology_id free_annotation
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):

In [23]:
library(scuttle)

lung_ref <- logNormCounts(lung_ref)

In [21]:
results_TabulaMuris <- SingleR(test = as.SingleCellExperiment(srat), ref = lung_ref, labels = lung_ref$cell_ontology_class)

In [22]:
results_TabulaMuris

DataFrame with 25814 rows and 4 columns
                                                                                scores
                                                                              <matrix>
CCACCTTGTTCGTCCT-1-WT_uninfected                        0.380173:0.402019:0.634546:...
ATGGGATAGGTTGCAC-1-WT_uninfected                        0.520593:0.611979:0.541157:...
GCGTTTCCACAATCCC-1-WT_uninfected                        0.512907:0.489966:0.495608:...
TGAGCTATCACGGACT-1-WT_uninfected                        0.509969:0.599434:0.515085:...
GCTCAGTAGGGCTATC-1-WT_uninfected                        0.394008:0.419387:0.670021:...
...                                                                                ...
ATAGGGTAGGCTGTAC-1-Shh_SPL_KO_infected_(PR8_virus) 0.0796567:0.0580774:-0.00363303:...
TCATGTTCATGATCGG-1-Shh_SPL_KO_infected_(PR8_virus) 0.1087346:0.1025455: 0.15871453:...
GTAAGGGTCTATGAGT-1-Shh_SPL_KO_infected_(PR8_virus) 0.1061420:0.1079087: 0.18174847:...
CAC

In [23]:
write.csv(results_main, "results_main.csv")
write.csv(results_fine, "results_fine.csv")
write.csv(results_TabulaMuris, "results_TabulaMuris.csv")