In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib import colors
from scipy.sparse import csr_matrix
from scipy.spatial import distance
import os
import warnings; warnings.simplefilter('ignore')
import anndata
import scrublet as scr

import seaborn as sns
sns.set_style('white')

np.random.seed(0)
sc.settings.verbosity = 3 
sc.set_figure_params(scanpy=True, dpi=80)

In [2]:
sc.logging.print_versions()

scanpy==1.4.5 anndata==0.7.5 umap==0.3.10 numpy==1.17.2 scipy==1.5.2 pandas==1.1.2 scikit-learn==0.21.3 statsmodels==0.10.1 python-igraph==0.7.1 louvain==0.6.1


### 6A

In [3]:
adata = sc.read('veatch_b_all20.h5ad')

In [4]:
adata.var['highly_variable'] = True

In [5]:
adata.var['highly_variable'].loc['IGKC'] = False
adata.var['highly_variable'].loc['IGLC3'] = False

In [6]:
sc.tl.pca(adata, random_state = 1010,svd_solver='arpack',use_highly_variable=True)
sc.pp.neighbors(adata, n_neighbors=20, n_pcs=20, random_state = 1010)
sc.tl.umap(adata, random_state = 1010, min_dist=0.2)
sc.tl.leiden(adata, resolution=0.7, random_state = 1010)

computing PCA with n_comps = 50
    on highly variable genes
    finished (0:00:00)
computing neighbors
    using 'X_pca' with n_pcs = 20


  if adata.isview:  # we shouldn't need this here...


    finished: added to `.uns['neighbors']`
    'distances', distances for each pair of neighbors
    'connectivities', weighted adjacency matrix (0:00:05)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:14)
running Leiden clustering
    finished: found 9 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:02)


In [7]:
genes = [ 'IGHD_membrane', 'IGHM_membrane', 'IGHG1_membrane', 'IGHM_secreted', 'IGHG1_secreted','IGHA1_secreted']

In [8]:
sc.set_figure_params(scanpy=True, dpi=80)

In [9]:
sc.pl.umap(adata, color=genes, ncols=3,vmax=20,
           cmap='Spectral_r')

### 6B

In [10]:
adata.obs['labels'] = 'Memory'
adata.obs['labels'][(adata.obs['leiden'] == '0') | (adata.obs['leiden'] == '2')| (adata.obs['leiden'] == '3')] = 'Naive'
adata.obs['labels'][(adata.obs['leiden'] == '5') | (adata.obs['leiden'] == '7')| (adata.obs['leiden'] == '10')] = 'Plasma IgG'
adata.obs['labels'][(adata.obs['leiden'] == '8')] = 'Plasma IgM'
adata.obs['labels'][(adata.obs['leiden'] == '9')] = 'Plasma IgA'

In [11]:
sc.pl.umap(adata, color='labels',
           palette=sns.color_palette("tab20", 20))