In [1]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import scipy.sparse

In [2]:
print(sys.version)
print('numpy version:', np.__version__)
print('pandas version:', pd.__version__)
print('scipy version:', scipy.__version__)

3.7.0 (default, Jul  2 2018, 15:31:25) 
[Clang 9.0.0 (clang-900.0.39.2)]
numpy version: 1.16.2
pandas version: 0.24.1
scipy version: 1.2.1


In [3]:
import rpy2
import rpy2.robjects as robjects
print('rpy2:', rpy2.__version__)

rpy2: 3.0.1


# Drop-Seq data

## Raw matrix

In [4]:
matrix_csc_values = robjects.r['readRDS']('../../data/drop-seq/expr_readcount_raw_csc_values.rds')
matrix_csc_indices = robjects.r['readRDS']('../../data/drop-seq/expr_readcount_raw_csc_indices.rds')
matrix_csc_indptr = robjects.r['readRDS']('../../data/drop-seq/expr_readcount_raw_csc_indptr.rds')
matrix_csc_shape = robjects.r['readRDS']('../../data/drop-seq/expr_readcount_raw_csc_shape.rds')

In [5]:
expr_readcount_raw = scipy.sparse.csc_matrix((np.asarray(matrix_csc_values, dtype=np.int64), 
                                              np.asarray(matrix_csc_indices, dtype=np.int64), 
                                              np.asarray(matrix_csc_indptr, dtype=np.int64)), 
                                             shape=np.asarray(matrix_csc_shape, dtype=np.int64))

In [6]:
expr_readcount_raw.shape

(27999, 27416)

In [7]:
if not Path('../../data/drop-seq/expr_readcount_raw_csc.npz').is_file():
    scipy.sparse.save_npz('../../data/drop-seq/expr_readcount_raw_csc.npz', 
                          expr_readcount_raw, compressed=True)

In [8]:
expr_readcount_raw_dimnames = robjects.r['readRDS']('../../data/drop-seq/expr_readcount_raw_csc_dimnames.rds')

In [9]:
expr_readcount_raw_dimnames = np.array([np.asarray(expr_readcount_raw_dimnames[0], dtype='U'),
                                        np.asarray(expr_readcount_raw_dimnames[1], dtype='U')])

In [10]:
if not Path('../../data/drop-seq/expr_readcount_raw_csc_dimnames.npy').is_file():
    np.save('../../data/drop-seq/expr_readcount_raw_csc_dimnames.npy', 
            expr_readcount_raw_dimnames)

## Normalized and scaled matrix

In [11]:
expr_readcount_norm_log_corrected_scaled = \
    robjects.r['readRDS']('../../data/drop-seq/expr_readcount_norm_log_corrected_scaled.rds')

In [12]:
expr_readcount_norm_log_corrected_scaled_dimnames = \
    np.array([np.asarray(robjects.r['rownames'](expr_readcount_norm_log_corrected_scaled), dtype='U'),
              np.asarray(robjects.r['colnames'](expr_readcount_norm_log_corrected_scaled), dtype='U')])

In [13]:
if not Path('../../data/drop-seq/expr_readcount_norm_log_corrected_scaled_dimnames.npy').is_file():
    np.save('../../data/drop-seq/expr_readcount_norm_log_corrected_scaled_dimnames.npy', 
            expr_readcount_norm_log_corrected_scaled_dimnames)

In [14]:
expr_readcount_norm_log_corrected_scaled_dims = robjects.r['dim'](expr_readcount_norm_log_corrected_scaled)

In [15]:
expr_readcount_norm_log_corrected_scaled = \
    np.asarray(expr_readcount_norm_log_corrected_scaled).reshape(expr_readcount_norm_log_corrected_scaled_dims[1], 
                                                                 expr_readcount_norm_log_corrected_scaled_dims[0]).T

In [16]:
expr_readcount_norm_log_corrected_scaled

array([[-0.09100748, -0.05435914, -0.08411386, ..., -0.20700135,
        -0.31560168, -0.00389515],
       [-0.09100748, -0.05435914, -0.08411386, ..., -0.20700135,
        -0.31560168, -0.00389515],
       [-0.09100748, -0.05435914, -0.08411386, ..., -0.20700135,
        -0.31560168, -0.00389515],
       ...,
       [-0.05435914, -0.08411386, -0.01280428, ..., -0.31560168,
        -0.00389515, -0.00407362],
       [-0.05435914, -0.08411386, -0.01280428, ..., -0.31560168,
        -0.00389515, -0.00407362],
       [-0.05435914, -0.08411386, -0.01280428, ...,  3.45034835,
        -0.00389515, -0.00407362]])

In [17]:
if not Path('../../data/drop-seq/expr_readcount_norm_log_corrected_scaled.npy').is_file():
    np.save('../../data/drop-seq/expr_readcount_norm_log_corrected_scaled.npy', 
            expr_readcount_norm_log_corrected_scaled)

## PCA result

In [18]:
pca_out = robjects.r['readRDS']('../../data/drop-seq/pca_out.rds')

In [19]:
pca_out

0,1
sdev,[RTYPES.REALSXP]
rotation,[RTYPES.REALSXP]
center,[RTYPES.LGLSXP]
scale,[RTYPES.LGLSXP]
x,[RTYPES.REALSXP]


In [20]:
pca_out[4]

0,1,2,3,4,5,6
-23.117708,-13.744099,-20.771961,...,0.0,-0.0,-0.0


In [21]:
pca_out_x_dims = robjects.r['dim'](pca_out[4])

In [22]:
pca_out_x = np.asarray(pca_out[4]).reshape(pca_out_x_dims[1], 
                                           pca_out_x_dims[0]).T

In [23]:
if not Path('../../data/drop-seq/pca_out_x.npy').is_file():
    np.save('../../data/drop-seq/pca_out_x.npy', pca_out_x)

# 10x data

## Raw matrix

In [24]:
matrix_csc_values = robjects.r['readRDS']('../../data/10x/expr_readcount_raw_csc_values.rds')
matrix_csc_indices = robjects.r['c'](robjects.r['readRDS']('../../data/10x/expr_readcount_raw_csc_indices_part1.rds'),
                                     robjects.r['readRDS']('../../data/10x/expr_readcount_raw_csc_indices_part2.rds'))
matrix_csc_indptr = robjects.r['readRDS']('../../data/10x/expr_readcount_raw_csc_indptr.rds')
matrix_csc_shape = robjects.r['readRDS']('../../data/10x/expr_readcount_raw_csc_shape.rds')

In [25]:
expr_readcount_raw = scipy.sparse.csc_matrix((np.asarray(matrix_csc_values, dtype=np.int64), 
                                              np.asarray(matrix_csc_indices, dtype=np.int64), 
                                              np.asarray(matrix_csc_indptr, dtype=np.int64)), 
                                             shape=np.asarray(matrix_csc_shape, dtype=np.int64))

In [26]:
expr_readcount_raw.shape

(27999, 34564)

In [27]:
if not Path('../../data/10x/expr_readcount_raw_csc.npz').is_file():
    scipy.sparse.save_npz('../../data/10x/expr_readcount_raw_csc.npz', 
                          expr_readcount_raw, compressed=True)

In [28]:
expr_readcount_raw_dimnames = robjects.r['readRDS']('../../data/10x/expr_readcount_raw_csc_dimnames.rds')

In [29]:
expr_readcount_raw_dimnames = np.array([np.asarray(expr_readcount_raw_dimnames[0], dtype='U'),
                                        np.asarray(expr_readcount_raw_dimnames[1], dtype='U')])

In [30]:
if not Path('../../data/10x/expr_readcount_raw_csc_dimnames.npy').is_file():
    np.save('../../data/10x/expr_readcount_raw_csc_dimnames.npy', 
            expr_readcount_raw_dimnames)