In [1]:
%load_ext py5
%gui osx

In [2]:
import pandas as pd
import numpy as np
from sciviewer import SCIViewer

In [6]:
print('DOWNLOADING AND EXTRACTING EXAMPLE DATA')
! mkdir -p ../data
! wget https://storage.googleapis.com/sabeti-public/dkotliar/scnavigator/pbmc3k/data/pbmc3k_umap_20210324.tsv -O ../data/pbmc3k_umap_20210324.tsv
! wget https://storage.googleapis.com/sabeti-public/dkotliar/scnavigator/pbmc3k/data/pbmc3k_expression_log2TP10K_20210324.tsv.gz -O ../data/pbmc3k_expression_log2TP10K_20210324.tsv.gz
! gzip -df ../data/pbmc3k_expression_log2TP10K_20210324.tsv.gz
! ls ../data

DOWNLOADING AND EXTRACTING EXAMPLE DATA
--2021-04-19 10:50:37--  https://storage.googleapis.com/sabeti-public/dkotliar/scnavigator/pbmc3k/data/pbmc3k_umap_20210324.tsv
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.6.240, 172.217.10.240, 172.217.10.16, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.6.240|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 96413 (94K) [text/tab-separated-values]
Saving to: ‘../data/pbmc3k_umap_20210324.tsv’


2021-04-19 10:50:37 (1.84 MB/s) - ‘../data/pbmc3k_umap_20210324.tsv’ saved [96413/96413]

--2021-04-19 10:50:37--  https://storage.googleapis.com/sabeti-public/dkotliar/scnavigator/pbmc3k/data/pbmc3k_expression_log2TP10K_20210324.tsv.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.6.240, 172.217.10.240, 172.217.10.16, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.6.240|:443... connected.
HTTP request sent, awaiting response... 

In [3]:
print("LOADING UMAP DATA...")

umap = pd.read_csv('../data/pbmc3k_umap_20210324.tsv', sep='\t', index_col=0)
umap.head()

LOADING UMAP DATA...


Unnamed: 0,UMAP_1,UMAP_2
AAACATACAACCAC-1,11.257295,12.561447
AAACATTGAGCTAC-1,7.166475,-2.057729
AAACATTGATCAGC-1,15.217629,12.554446
AAACCGTGCTTCCG-1,-1.826561,6.781221
AAACCGTGTATGCG-1,7.656905,17.59844


In [4]:
print("LOADING GENE EXPRESSION DATA...")

expr = pd.read_csv('../data/pbmc3k_expression_log2TP10K_20210324.tsv', sep='\t', index_col=0)
expr.iloc[:5,:5]

LOADING GENE EXPRESSION DATA...


Unnamed: 0,LINC00115,NOC2L,KLHL17,PLEKHN1,HES4
AAACATACAACCAC-1,0.0,0.0,0.0,0.0,0.0
AAACATTGAGCTAC-1,0.0,0.0,0.0,0.0,0.0
AAACATTGATCAGC-1,0.0,0.0,0.0,0.0,0.0
AAACCGTGCTTCCG-1,0.0,0.0,0.0,0.0,0.0
AAACCGTGTATGCG-1,0.0,0.0,0.0,0.0,0.0


In [6]:
svobj = SCIViewer(umap, expr)
svobj.explore_data()

0.0765528678894043 seconds to select and project cells
Selected 526 cells
Calculating correlations...


  rs = np.dot(DP, DO) / np.sqrt(np.sum(DO ** 2, 0) * np.sum(DP ** 2))


1.1528651714324951 seconds to calculate correlations. Sparsity:  False


## The class attributes below get populated in real time and thus can be accessed when the interactive viewer is running

In [8]:
## This attribute get updated in real time whenever cells are selected

svobj.selected_cells.head()

Unnamed: 0,index,cell_name,projection
0,0,AAACATACAACCAC-1,0.079498
1,4,AAACCGTGTATGCG-1,0.771944
2,6,AAACGCTGACCAGT-1,0.275037
3,7,AAACGCTGGTTCTT-1,0.214253
4,8,AAACGCTGTAGCCA-1,0.196479


In [9]:
## This gets updated in real time when cells are selected in differential expression mode
svobj.results_proj_correlation.head()

Unnamed: 0,R,P
LINC00115,-0.004645,0.915352
NOC2L,0.053452,0.221003
KLHL17,0.053389,0.221548
PLEKHN1,0.027167,0.534134
HES4,0.057122,0.190866


0.06090807914733887 seconds to select and project cells
Selected 343 cells
Calculating differential expression...
0.022533178329467773 seconds to calculate genesums. Sparsity:  False
0.34513401985168457 seconds to calculate squared genesums. Sparsity:  False


  remainder_stds = np.sqrt((self.gene_sqsum - selected_stds - (remainder_N*remainder_means**2)) / (remainder_N -1))


1.312014102935791 seconds to calculate differential expression. Sparsity:  False


In [11]:
## This gets updated in real time when cells are selected in directional correlation mode
svobj.results_diffexpr.head()

Unnamed: 0,T,P
LINC00115,0.587919,0.556917
NOC2L,1.403902,0.161092
KLHL17,-0.25911,0.795651
PLEKHN1,-2.635037,0.00847
HES4,-4.88108,1e-06


0.07551074028015137 seconds to select and project cells
Selected 525 cells
Calculating correlations...


  rs = np.dot(DP, DO) / np.sqrt(np.sum(DO ** 2, 0) * np.sum(DP ** 2))


1.277630090713501 seconds to calculate correlations. Sparsity:  False


In [12]:
svobj.results_proj_correlation.head()

Unnamed: 0,R,P
LINC00115,-0.004961,0.909711
NOC2L,0.052584,0.229047
KLHL17,0.053361,0.22223
PLEKHN1,0.02701,0.536898
HES4,0.057113,0.19136


## The class attributes below only get populated when the export and close button is pushed

In [18]:
svobj.significant_genes.head()

Unnamed: 0,R,P
GZMB,0.874089,3.9960159999999994e-166
GNLY,0.779129,3.73149e-108
FGFBP2,0.755538,3.926686e-98
PRF1,0.733799,7.355188e-90
NKG7,0.7218,1.236486e-85


In [17]:
svobj.significant_genes.tail()

Unnamed: 0,R,P
IL7R,-0.458064,1.359945e-28
GZMK,-0.464188,2.0567840000000001e-29
LTB,-0.573685,2.822993e-47
CD3D,-0.592153,5.402195999999999e-51
EEF1A1,-0.61795,1.314918e-56


In [15]:
svobj.selected_gene_cell_data.head()

Unnamed: 0,index,proj,exp
0,AAACATACAACCAC-1,0.077073,0.0
1,AAACCGTGTATGCG-1,0.755663,0.0
2,AAACGCTGACCAGT-1,0.268625,0.0
3,AAACGCTGGTTCTT-1,0.209041,2.205804
4,AAACGCTGTAGCCA-1,0.19195,0.0
