diff --git a/README.markdown b/README.markdown index 474453e..5ffa81b 100644 --- a/README.markdown +++ b/README.markdown @@ -1,7 +1,7 @@ # Hackathon Single Cell Notebook Clone this repo to your computer. -```git clone https://github.com/raybueno/UCSF_HACKATHON_PRIMER.git``` +```git clone git@github.com:UCSF-DSCOLAB/hackathon_primer.git``` This repo contains the Dockerfile and sample data with tutorial used to create the hackathon containers for single-cell analysis for both python and R-studio. @@ -13,13 +13,17 @@ Sample Data with tutorial also included. Users can start analyzing single cell sequencing data with Scanpy in python or Seurat in R. ## Requirements: -Docker +- Docker + +Note: The easiest way to install and use docker, is via docker desktop: https://www.docker.com/products/docker-desktop/ + +Be sure to select the appropriate installation for you Macbook machine. # Running Python Container ## Pull image from Docker hub After installing docker, pull the repository from docker hub. -```docker pull drbueno/single-cell-nb:latest ``` +```docker pull drbueno/single-cell-nb:latest``` ## How to Run Using Mac or Ubuntu Change directory to ```python-container```. @@ -30,9 +34,20 @@ Run (recommended to run under screen) You will be prompted to set your working directory. This is the directory where the data lives. ALL work must be done in this directory. It will be mounted inside the container in ```/home/data``` -After entering the path of your local work directory, follow instructions to copy and paste link with -IP address to a web browser. +An example of working directory: `/Users/hackathon-user/UCSF_HACKATHON_PRIMER/python-container/data` + +After entering the path of your local work directory, follow instructions to copy and paste link with IP address to a web browser. + +### If you are using an M1/M2 Mac +Be sure to have: + +- The latest version of Docker + +And in Docker Settings (using docker desktop): + +- General -> User Virtualization Framework -> ON +- Features in development -> User Rosetta for x86/amd64 emulation on Apple Silicon -> ON ## Running Scanpy Tutorial @@ -68,7 +83,7 @@ analyzing your data. ## Pull image from Docker hub After installing docker, pull the repository from docker hub. -```docker pull drbueno/rstudio-single-cell:latest ``` +```docker pull drbueno/rstudio-single-cell:latest``` ## How to Run Using Mac or Ubuntu Change directory to ```rstudio-container```. @@ -78,6 +93,8 @@ Run ```./start.sh``` You will be prompted to set your working directory. This is the directory where the data lives. ALL work must be done in this directory. It will be mounted inside the container in ```/home``` +An example of working directory: `/Users/hackathon-user/UCSF_HACKATHON_PRIMER/rstudio-container/data` + Go to a web browser and visit ```localhost:8787``` diff --git a/python-container/python-tutorial.ipynb b/python-container/python-tutorial.ipynb index 2e892b2..c586101 100644 --- a/python-container/python-tutorial.ipynb +++ b/python-container/python-tutorial.ipynb @@ -8,8 +8,11 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", - "import scanpy as sc" - ] + "import scanpy as sc\n", + "import warnings \n", + "warnings.filterwarnings('ignore')uu" + ], + "id": "5c6c1ceedd895a4b" }, { "cell_type": "markdown", @@ -17,7 +20,8 @@ "source": [ "# This is a snippet from the scanpy tutorial \n", "go to https://scanpy-tutorials.readthedocs.io/en/latest/pbmc3k.html for full tutorial" - ] + ], + "id": "362bd1fab9d2d378" }, { "cell_type": "code", @@ -28,7 +32,8 @@ "adata = sc.read_10x_mtx('/home/tutorial/filtered_gene_bc_matrices/hg19/',\n", " var_names = 'gene_symbols',\n", " cache = True)" - ] + ], + "id": "c2c289451f4dc1d6" }, { "cell_type": "code", @@ -37,7 +42,8 @@ "outputs": [], "source": [ "adata.var_names_make_unique()" - ] + ], + "id": "4896dbeb0cd80db6" }, { "cell_type": "code", @@ -58,14 +64,16 @@ ], "source": [ "adata" - ] + ], + "id": "c05434bdae362ad5" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preprocessing " - ] + ], + "id": "c5300883059bb7c6" }, { "cell_type": "code", @@ -87,14 +95,16 @@ ], "source": [ "sc.pl.highest_expr_genes(adata, n_top=20, )" - ] + ], + "id": "ee7cbcef69e1769c" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# basic filtering " - ] + ], + "id": "a3440c6def535ed8" }, { "cell_type": "code", @@ -113,14 +123,16 @@ "source": [ "sc.pp.filter_cells(adata, min_genes=200)\n", "sc.pp.filter_genes(adata, min_cells=3)" - ] + ], + "id": "fa2438c494490c99" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Mitochondrial gene info" - ] + ], + "id": "89eccd900b66e4c0" }, { "cell_type": "code", @@ -130,7 +142,8 @@ "source": [ "adata.var['mt'] = adata.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'\n", "sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)" - ] + ], + "id": "cb7ab975c0d1d881" }, { "cell_type": "code", @@ -164,7 +177,8 @@ ], "source": [ "sc.pl.violin(adata, ['n_genes_by_counts'])" - ] + ], + "id": "ab003857e6f90661" }, { "cell_type": "code", @@ -198,7 +212,8 @@ ], "source": [ "sc.pl.violin(adata, ['total_counts'])" - ] + ], + "id": "4e3878fe0e0022b8" }, { "cell_type": "code", @@ -232,14 +247,16 @@ ], "source": [ "sc.pl.violin(adata, ['pct_counts_mt'])" - ] + ], + "id": "a4c4f0134defc5af" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remove cells that have too many mitochondrial genes expressed or too many total counts:" - ] + ], + "id": "10a3446931d36e99" }, { "cell_type": "code", @@ -282,7 +299,8 @@ "source": [ "sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')\n", "sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')" - ] + ], + "id": "1fd866cdffb1d51b" }, { "cell_type": "code", @@ -303,14 +321,16 @@ "source": [ "adata = adata[adata.obs.n_genes_by_counts < 2500, :]\n", "adata = adata[adata.obs.pct_counts_mt < 5, :]" - ] + ], + "id": "35764747925638fd" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Total-count normalize (library-size correct) the data matrix 𝐗 to 10,000 reads per cell, so that counts become comparable among cells." - ] + ], + "id": "ff147ea1d55f6715" }, { "cell_type": "code", @@ -328,14 +348,16 @@ ], "source": [ "sc.pp.normalize_total(adata, target_sum=1e4)" - ] + ], + "id": "783eab52740584d2" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Logarithmize the data:" - ] + ], + "id": "38567a51e40de51" }, { "cell_type": "code", @@ -344,14 +366,16 @@ "outputs": [], "source": [ "sc.pp.log1p(adata)\n" - ] + ], + "id": "b95b45c162ab0ea8" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Identify highly-variable genes." - ] + ], + "id": "c8c481b23616631a" }, { "cell_type": "code", @@ -360,7 +384,8 @@ "outputs": [], "source": [ "sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)" - ] + ], + "id": "33df7a6068495b98" }, { "cell_type": "code", @@ -382,7 +407,8 @@ ], "source": [ "sc.pl.highly_variable_genes(adata)" - ] + ], + "id": "30de4bdb6e50dfcd" }, { "cell_type": "code", @@ -400,7 +426,8 @@ ], "source": [ "adata = adata[:, adata.var.highly_variable]" - ] + ], + "id": "6bcbd4da5241e276" }, { "cell_type": "code", @@ -418,14 +445,16 @@ ], "source": [ "sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])" - ] + ], + "id": "e4cc1fc68c6111dd" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# PCA " - ] + ], + "id": "2572ff75f065049a" }, { "cell_type": "code", @@ -434,7 +463,8 @@ "outputs": [], "source": [ "sc.pp.scale(adata, max_value=10)" - ] + ], + "id": "24723dfede31c1dc" }, { "cell_type": "code", @@ -452,7 +482,8 @@ ], "source": [ "sc.tl.pca(adata, svd_solver='arpack')" - ] + ], + "id": "79cf216fdf1d7332" }, { "cell_type": "code", @@ -482,14 +513,16 @@ ], "source": [ "sc.pl.pca(adata, color='CST3')" - ] + ], + "id": "1f7e9dc799df2e2f" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Computing the neighborhood graph" - ] + ], + "id": "fa0c299b54fc6435" }, { "cell_type": "code", @@ -498,14 +531,16 @@ "outputs": [], "source": [ "sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)" - ] + ], + "id": "89e4e3a0de7f1b2a" }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Embedding the neighborhood graph" - ] + ], + "id": "afb1c32550a3da81" }, { "cell_type": "code", @@ -514,7 +549,8 @@ "outputs": [], "source": [ "sc.tl.umap(adata)" - ] + ], + "id": "57df5c2af6923ea2" }, { "cell_type": "code", @@ -523,7 +559,8 @@ "outputs": [], "source": [ "sc.pl.umap(adata, color=['CST3', 'NKG7', 'PPBP'])" - ] + ], + "id": "acb62c0457d9c0ff" } ], "metadata": { diff --git a/rstudio-container/single-cell-tutorial.Rmd b/rstudio-container/single-cell-tutorial.Rmd index 73c0a87..42bd6c8 100644 --- a/rstudio-container/single-cell-tutorial.Rmd +++ b/rstudio-container/single-cell-tutorial.Rmd @@ -84,4 +84,5 @@ head(Idents(pbmc), 5) pbmc <- RunUMAP(pbmc, dims = 1:10) -DimPlot(pbmc, reduction = "umap") \ No newline at end of file +DimPlot(pbmc, reduction = "umap") +``` \ No newline at end of file diff --git a/rstudio-container/start.sh b/rstudio-container/start.sh index caf0ab1..41fd7c9 100755 --- a/rstudio-container/start.sh +++ b/rstudio-container/start.sh @@ -3,4 +3,21 @@ echo Set your workding directory. This is the directory where all your data live Please enter full path. read dir -docker run -d -p 8787:8787 -e PASSWORD=@hackathon2021 -v $dir:/home drbueno/rstudio-single-cell \ No newline at end of file +architecture=$(uname -m) + +if [ "$architecture" == "arm64" ]; then + docker run -d --rm \ + -p 8787:8787 \ + -e PASSWORD=@hackathon2021 \ + -v $dir:/home \ + --platform linux/x86_64 \ + drbueno/rstudio-single-cell + echo "please navigate to: http://localhost:8787/" +else + docker run -d --rm \ + -p 8787:8787 \ + -e PASSWORD=@hackathon2021 \ + -v $dir:/home \ + drbueno/single-cell-nb + echo "please navigate to: http://localhost:8787/" +fi \ No newline at end of file