In [27]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
import scipy.io as sio


In [54]:
# define paths
meta_path = '../raw_data/cellranger_counts_out/aggr_out/metadata.npy'
features_name_path = '../raw_data/cellranger_counts_out/aggr_out/features_name.npy'
matrix_path = '../raw_data/cellranger_counts_out/aggr_out/counts.npy'
features_id_path = '../raw_data/cellranger_counts_out/aggr_out/features_id.npy'
barcodes_path = '../raw_data/cellranger_counts_out/aggr_out/barcodes.npy'

# load data
counts = np.lib.format.open_memmap(matrix_path, mode='r')
features_name = np.lib.format.open_memmap(features_name_path, mode='r')
features_id = np.lib.format.open_memmap(features_id_path, mode='r')
barcodes = np.lib.format.open_memmap(barcodes_path, mode='r')
meta = np.load(meta_path, allow_pickle=True)

# get column names
meta_col_names = np.array(pd.read_csv('../raw_data/metadata/single_cell_individual_metadata.csv', index_col=0).columns)

# run checks
temp = features_name.reshape((len(np.unique(meta[:,1])), counts.shape[1]))
if np.unique([np.array_equal(temp[0], x) for x in temp])[0]:
    print('Feature names were aggregated correctly across samples.')
    
temp = features_id.reshape((len(np.unique(meta[:,1])), counts.shape[1]))
if np.unique([np.array_equal(temp[0], x) for x in temp])[0]:
    print('Feature IDs were aggregated correctly across samples.')
    
# concatenate features and metadata
features = np.concatenate((features_name[:counts.shape[1]].reshape(-1,1), features_id[:counts.shape[1]].reshape(-1,1)), axis=1)
meta = np.concatenate((barcodes.reshape(-1,1), meta), axis=1)

# save coldata and rowdata
colData = pd.DataFrame(features)
colData.columns = ['gene_name', 'Ensembl_id']
colData.to_csv('../open_data/synapse/expression_matrices/raw_colData.csv', index=False)

rowData = pd.DataFrame(np.concatenate((meta[:,2].reshape(-1,1), meta[:,3].reshape(-1,1)), axis=1))
rowData.columns = ['projID', 'libraryID']
rowData.to_csv('../open_data/synapse/expression_matrices/raw_rowData.csv', index=False)

Feature names were aggregated correctly across samples.
Feature IDs were aggregated correctly across samples.


In [28]:
# save to sparse matrix 
counts_sparse = csr_matrix(counts)
sio.mmwrite("../open_data/synapse/expression_matrices/raw_counts.mtx",counts_sparse)
