In [2]:
import numpy as np
import pandas as pd
import scprep
import re
import os

prefix = '/Users/6j9/projects/mouse'
data_dir = os.path.join(prefix, 'data')

In [3]:
experiments = ['FGC2063_5_86846', 'FGC2063_5_86847', 'FGC2063_5_86848', 'FGC2063_5_86849', 'FGC2091_7_92848']
m = []
for experiment in experiments:
    print(experiment)
    matrix_dir = os.path.join(data_dir, experiment, 'filtered_feature_bc_matrix')
    mat = scprep.io.load_10X(matrix_dir, sparse=True, gene_labels='both')
    m.append(mat)

experiment_names = ['86846', '86847', '86848', '86849', '92848']
matrix, labels = scprep.utils.combine_batches(m, experiment_names, append_to_cell_names=True)
print("batches combined...")
del m

matrix = scprep.normalize.library_size_normalize(matrix)

gene_symbols = np.full(len(matrix.columns), "", dtype='U30')
for i, gene in enumerate(matrix.columns.values):
    symbol = re.search("(.+) \(", gene).group(1)
    gene_symbols[i] = symbol.upper()

genes = ['CXCL12', 'CCL11', 'CCL7']

FGC2063_5_86846
FGC2063_5_86847
FGC2063_5_86848
FGC2063_5_86849
FGC2091_7_92848
batches combined...


In [12]:
out_cols = pd.DataFrame(index=matrix.index, columns=genes)
for gene in genes:
    idx = np.where(gene_symbols == gene)[0][0]
    out_cols[gene] = matrix.iloc[:, idx]
    nnz_cols = out_cols[gene].iloc[np.nonzero(out_cols[gene].values)]
    nnz_cols = np.sort(nnz_cols)
    cutoff_idx = round(0.9 * len(nnz_cols))
    print(f"{gene} 90% cutoff: {nnz_cols[cutoff_idx]}")

CXCL12 90% cutoff: 22.42152466367713
CCL11 90% cutoff: 29.885057471264368
CCL7 90% cutoff: 104.16666666666666


In [14]:
out_path = os.path.join(prefix, 'cytoscape/attributes/expression/chemokine_cols.tsv')
out_cols.to_csv(out_path, sep="\t")