In [1]:
%gui osx
%load_ext py5

In [2]:
import pandas as pd
import numpy as np
from sciviewer import SCIViewer
import scanpy as sc

In [3]:
print('DOWNLOADING AND EXTRACTING EXAMPLE DATA')
! mkdir -p ../data
! wget https://www.dropbox.com/s/llw9i7i0gum090z/pbmc3k_umap_20210420.tsv -O ../data/pbmc3k_umap_20210420.tsv
! wget https://www.dropbox.com/s/rijo547isu3dhel/pbmc3k_expression_log2TP10K_20210420.tsv -O ../data/pbmc3k_expression_log2TP10K_20210420.tsv
! wget https://www.dropbox.com/s/3309a3tydsnqwi0/pbmc3k_20210420.h5ad -O ../data/pbmc3k_20210420.h5ad
! ls ../data

DOWNLOADING AND EXTRACTING EXAMPLE DATA
--2022-09-05 18:45:42--  https://www.dropbox.com/s/gx9y42m4knyi1cl/pbmc3k_umap_20210420.tsv
Resolving www.dropbox.com (www.dropbox.com)... 162.125.4.18
Connecting to www.dropbox.com (www.dropbox.com)|162.125.4.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/gx9y42m4knyi1cl/pbmc3k_umap_20210420.tsv [following]
--2022-09-05 18:45:42--  https://www.dropbox.com/s/raw/gx9y42m4knyi1cl/pbmc3k_umap_20210420.tsv
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc57c397ce94eaa8c1cabfbd62ae.dl.dropboxusercontent.com/cd/0/inline/BsbTglqCA0hzt8CLPmd83dXjL1TmgkUAKAV4qxQw9NFw49abk0i0SKDcqnBeFIVOlxYqejNXc9AJ2XcczdiXETHZAYFXmS6UTv983kvKyZP1qufFlfPMV9dSNm3rQyBrrgm2zl3vx69DE49kneFQfMehNA3_tsXBXGIB0a7J5h_-Bg/file# [following]
--2022-09-05 18:45:42--  https://uc57c397ce94eaa8c1cabfbd62ae.dl.dropboxusercontent.com/cd/0/inline/BsbTglqCA0hzt8CLPmd83dXjL1TmgkUA

## Here we illustrate how to run Sciviewer with several potential input types.
- [passing in the 2D embedding data and the gene expression data as Pandas DataFrames](#dataframe)
- [passing a Scanpy AnnData object with dense data ](#denseadata)
- [passing a Scanpy AnnData object with sparse data ](#sparseadata)
- [passing a sparse csc_matrix](#sparse_matrix)


<a id="dataframe"></a>

## Dense Pandas DataFrame

Perhaps the simplest way to pass the data is with Pandas DataFrames. The cell names are learned from the rows and the gene names are learned from the columns. The ordering of the cells in the 2D embedding and the gene expression matrix are assumed to match

In [4]:
print("LOADING UMAP DATA...")

umap = pd.read_csv('../data/pbmc3k_umap_20210420.tsv', sep='\t', index_col=0)
umap.head() 

LOADING UMAP DATA...


Unnamed: 0,UMAP_1,UMAP_2
AAACATACAACCAC-1,9.728817,4.212151
AAACATTGAGCTAC-1,3.799765,10.181845
AAACATTGATCAGC-1,7.023628,4.829623
AAACCGTGCTTCCG-1,-0.298523,2.024061
AAACCGTGTATGCG-1,8.018503,-0.300426


In [5]:
print("LOADING GENE EXPRESSION DATA...")

expr = pd.read_csv('../data/pbmc3k_expression_log2TP10K_20210420.tsv', sep='\t', index_col=0)
expr.iloc[:5,:5]

LOADING GENE EXPRESSION DATA...


Unnamed: 0,AL627309.1,AP006222.2,RP11-206L10.2,RP11-206L10.9,LINC00115
AAACATACAACCAC-1,0.0,0.0,0.0,0.0,0.0
AAACATTGAGCTAC-1,0.0,0.0,0.0,0.0,0.0
AAACATTGATCAGC-1,0.0,0.0,0.0,0.0,0.0
AAACCGTGCTTCCG-1,0.0,0.0,0.0,0.0,0.0
AAACCGTGTATGCG-1,0.0,0.0,0.0,0.0,0.0


In [7]:
svobj = SCIViewer(expr, umap)
svobj.explore_data()

Create renderer
Start thread
Finish thread
Setting up...
Setting up...
0.046588897705078125 seconds to select and project cells
Selected 750 cells
Calculating correlations...
0.6320891380310059 seconds to calculate correlations. Sparsity:  False


#### The class attributes below get populated in real time and thus can be accessed when the interactive viewer is running

In [12]:
## This attribute get updated in real time whenever cells are selected
svobj.selected_cells.head()

Unnamed: 0,index,cell_name,projection
0,0,AAACATACAACCAC-1,0.423739
1,4,AAACCGTGTATGCG-1,0.966085
2,5,AAACGCACTGGTAC-1,0.404905
3,6,AAACGCTGACCAGT-1,0.281324
4,7,AAACGCTGGTTCTT-1,0.528745


In [13]:
## This gets updated in real time when cells are selected in directional mode
svobj.results_proj_correlation.sort_values(by='P').head()

Unnamed: 0,R,P
NKG7,0.840644,6.559576999999999e-278
CST7,0.777603,8.557222e-211
GZMA,0.720683,7.355183e-167
CCL5,0.656864,4.5828050000000004e-129
PRF1,0.653976,1.42823e-127


In [15]:
## This gets updated in real time when cells are selected in differential mode
svobj.results_diffexpr.sort_values(by='P').head()

Unnamed: 0,T,P
CD74,59.268987,1.2038420000000001e-206
HLA-DRA,53.161393,2.492434e-168
GIMAP7,-35.745215,5.299765e-135
PRKCQ-AS1,-22.658232,1.419184e-103
CMTM3,-21.694146,8.54505e-96


<a id="denseadata"></a>

## AnnData with dense input matrix
Alternatively, the data can be passed as a Scanpy AnnData object. By default the expression data is read from the .X attribute and the umap data is read from .obsm with the key 'X_umap'. However alternative embeddings can be provided with the `embedding_name` argument and the expression data can be read from the .raw.X attribute by setting the use_raw attribute to True

In [16]:
data = sc.read('../data/pbmc3k_20210420.h5ad')
data

AnnData object with n_obs × n_vars = 2638 × 1838
    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt'
    var: 'gene_ids', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'hvg', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

EXPORTING DATA...
BYE


2022-09-05 18:49:31.243 python[55428:1341671] NewtNSView::dealloc: softLock still hold @ dealloc!


#### Here the data in data.X is dense and the data in data.raw.X is sparse. We first illustrate the method with the dense data and subsequently with the sparse data

In [17]:
svobj = SCIViewer(data, embedding_name='X_umap', use_raw=False)
svobj.explore_data()

Create renderer
Start thread
Finish thread
Setting up...
0.0469052791595459 seconds to select and project cells
Selected 640 cells
Calculating correlations...
0.05839276313781738 seconds to calculate correlations. Sparsity:  False


<a id="sparseadata"></a>

## AnnData with sparse input matrix


In [18]:
data.raw.X

<2638x13714 sparse matrix of type '<class 'numpy.float32'>'
	with 2238732 stored elements in Compressed Sparse Row format>

#### This data is sparse but is in compressed sparse row format (csr_matrix) rather than csc_matrix, so we need to convert it to csc_matrix format

In [19]:
import scipy.sparse as sp

In [21]:
z = sp.csc_matrix(data.raw.X)

In [22]:
data.raw = sc.AnnData(z, var=data.raw.var, obs=data.obs)

In [23]:
data.raw.X

<2638x13714 sparse matrix of type '<class 'numpy.float32'>'
	with 2238732 stored elements in Compressed Sparse Column format>

EXPORTING DATA...
BYE


2022-09-05 18:50:01.486 python[55428:1341671] NewtNSView::dealloc: softLock still hold @ dealloc!


#### Now that data.raw.X is in csc_matrix format, it can be passed to sciviewer

In [24]:
svobj = SCIViewer(data, embedding_name='X_umap', use_raw=True)
svobj.explore_data()

Create renderer
Start thread
Finish thread
Setting up...


<a id="sparse_matrix"></a>

## Passing a sparse matrix directly

We can also pass a sparse matrix directly. Since no information is provided about the gene and cell names, we pass those as separate arguments

In [25]:
umap = pd.DataFrame(data.obsm['X_umap'], index=data.obs.index, columns=['UMAP_1', 'UMAP_2'])
umap.head()

Unnamed: 0,UMAP_1,UMAP_2
AAACATACAACCAC-1,9.728817,4.212151
AAACATTGAGCTAC-1,3.799765,10.181845
AAACATTGATCAGC-1,7.023628,4.829623
AAACCGTGCTTCCG-1,-0.298523,2.024061
AAACCGTGTATGCG-1,8.018503,-0.300426


In [26]:
cell_names = list(data.obs.index)
cell_names[:5]

['AAACATACAACCAC-1',
 'AAACATTGAGCTAC-1',
 'AAACATTGATCAGC-1',
 'AAACCGTGCTTCCG-1',
 'AAACCGTGTATGCG-1']

In [27]:
gene_names = list(data.raw.var.index)
gene_names[:5]

['AL627309.1', 'AP006222.2', 'RP11-206L10.2', 'RP11-206L10.9', 'LINC00115']

In [28]:
expr = data.raw.X

EXPORTING DATA...
BYE


2022-09-05 18:50:16.217 python[55428:1341671] NewtNSView::dealloc: softLock still hold @ dealloc!


In [29]:
svobj = SCIViewer(expr, umap, gene_names=gene_names, cell_names=cell_names)
svobj.explore_data()

Create renderer
Start thread
Finish thread
Setting up...
0.020800113677978516 seconds to select and project cells
Selected 225 cells
Calculating correlations...
0.2850778102874756 seconds to calculate correlations. Sparsity:  True
EXPORTING DATA...
BYE


2022-09-05 18:50:27.405 python[55428:1341671] NewtNSView::dealloc: softLock still hold @ dealloc!
