# Single Cell Analysis of D7, D14 and D21 RGC-iNs

In [2]:
import scanpy as sc
import pandas as pd

sc.settings.verbosity = 3 # hints 

### Data collection

In [1]:
### Read data into an AnnData objects (this step may take a few minutes the first time) ###

adata_7 = sc.read_10x_mtx(
    f'./../../sc_data/d7/',
    var_names='gene_symbols',
    cache=True
)

adata_14 = sc.read_10x_mtx(
    f'./../../sc_data/d14/',
    var_names='gene_symbols',
    cache=True
)

adata_21 = sc.read_10x_mtx(
    f'./../../sc_data/d21/',
    var_names='gene_symbols',
    cache=True
)

data = [adata_7, adata_14, adata_21]

for adata in data:
    adata.var_names_make_unique()
    display(adata)

NameError: name 'sc' is not defined

In [None]:
# Add 'sample' column in order to later distinguish cells from different timepoints
adata_7.obs['sample'] = 'd7'
adata_14.obs['sample'] = 'd14'
adata_21.obs['sample'] = 'd21'

# Concatenate into one AnnData
adata = adata_7.concatenate(adata_14, adata_21)
display(adata)

### Preprocessing

In [None]:
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

# annotate the group of mitochondrial genes as 'mt' for later removal
adata.var['mt'] = adata.var_names.str.startswith('MT-')

# generate plots to assess cell/transcript quality
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

In [None]:
# filter out cells with an abnormal number of total_counts and those with high mitochondrial gene presence
adata = adata[adata.obs.total_counts < 200000, :]
adata = adata[adata.obs.pct_counts_mt < 5, :]

# normalize and logarthmize data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

display(adata)

### Clustering and Dimensionality Reduction

In [None]:
# compute principle components using scanpy and visualize PC1/PC2 and the variance ratios
sc.tl.pca(adata, svd_solver='arpack', use_highly_variable=False)
sc.pl.pca(adata)
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
# neighborhood generation, louvain clustering, and UMAP dimensionality reduction
sc.pp.neighbors(adata)
sc.tl.louvain(adata)
sc.tl.umap(adata, n_components=3)

### UMAP Visualization

In [None]:
# plot UMAPs colored for Louvain clusters and for timepoint
sc.pl.umap(adata, color=['louvain'], legend_loc='on data', save='_all_timepoints_louvain.pdf')
sc.pl.umap(adata, color=['sample'], palette='Set3', legend_loc='on data', save='_all_timepoints.pdf')

### Cell Type Analysis

In [None]:
# create dotplot for analysis of cluster-by-cluster cell type marker gene expression
subtypes = {
    'HKG':['GAPDH', 'ACTB'],
    'Pluri.':['NANOG', 'DNMT3B'],
    'Pan-Neuronal':['TUBB3', 'SYT1', 'MAP2', 'DLG4', 'SYP'],
    'RGC':['DCX', 'NEFL', 'INA', 'STMN2', 'SNCG', 'GAP43', 'SOX4', 'SOX11', 'SOX12', 'RBPMS'],
    'Photoreceptor':['CRX', 'ARR3', 'RHO', 'NRL', 'LHX4'],
    'Glial':['S100B', 'PDGFRA', 'OLIG2', 'OLIG3', 'GFAP', 'RLBP1', 'AQP4'],
    'Amacrine':['GAD1', 'PAX6', 'TFAP2A', 'ZNF697', 'SLC17A8', 'SLC18A3', 'GJD2'],
    'Bipolar':['NETO1', 'VSX2', 'OTX2', 'GRM6', 'CABP5'],
    'Horizontal':['LHX1', 'CALB1', 'PTF1A', 'GBX2']
}
sc.pl.dotplot(adata, subtypes, groupby='louvain',
                vmax=1, mean_only_expressed=True, dot_max=1,
                categories_order=['4', '11', '1', '7', '3', '0', '9', '10', '6', '12', '8', '16', '13', '15', '5', '2', '14'],
                title='Cell Type Markers',
                var_group_rotation=0,
                save='d7-21_celltype_dotplot.pdf'
)

Written by Manan Chopra (m1chopra@ucsd.edu) @ Wahlin Lab  
Last updated on May 22, 2023