In [1]:
import scanpy as sc
import pandas as pd
import os

BASE_DIR = os.getcwd()

In [2]:
adata = sc.read_10x_h5(
    filename=os.path.join(
        BASE_DIR, "data", "5k_pbmc_protein_v3_nextgem_filtered_feature_bc_matrix.h5"
    ),
    gex_only=False,
)
adata.var_names_make_unique()

  utils.warn_names_duplicates("var")


In [3]:
# Filter for Citeseq data
adata_citeseq = adata[:, adata.var["feature_types"] == "Antibody Capture"]

In [4]:
gen_data = pd.DataFrame(adata_citeseq.X.toarray())
gen_data.columns = pd.Series(adata_citeseq.var.index)

In [5]:
# Add column for barcode
gen_data["barcode_rank"] = pd.Series(range(len(gen_data)))

In [6]:
adata_citeseq

View of AnnData object with n_obs × n_vars = 5527 × 32
    var: 'gene_ids', 'feature_types', 'genome', 'pattern', 'read', 'sequence'

### Generate FCS file

In [7]:
import flowio
import numpy as np

FCS_OUTPUT_FILENAME = os.path.join(BASE_DIR, "data", "citeseq.fcs")

In [8]:
file_obj = flowio.create_fcs(
    file_handle=open(FCS_OUTPUT_FILENAME, mode="wb"),
    event_data=np.column_stack(
        [adata_citeseq.X.toarray(), np.array(range(len(adata_citeseq.obs)))]
    ).flatten(),
    channel_names=adata_citeseq.var.index.tolist() + ["barcode_rank"],
)
file_obj.close()

In [9]:
fcs_data = flowio.FlowData(FCS_OUTPUT_FILENAME)

In [10]:
pd.DataFrame(np.reshape(fcs_data.events, (-1, fcs_data.channel_count))).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
0,12.0,280.0,9.0,3122.0,746.0,9.0,1.0,5.0,2.0,4.0,...,2.0,6.0,8.0,4.0,101.0,4.0,5.0,2.0,4.0,0.0
1,24.0,231.0,11.0,1241.0,355.0,10.0,2.0,4.0,11.0,5.0,...,1.0,4.0,8.0,8.0,1450.0,5.0,0.0,1.0,1.0,1.0
2,23.0,117.0,3.0,582.0,133.0,7.0,4.0,1.0,7.0,3.0,...,4.0,5.0,2.0,2.0,524.0,2.0,1.0,4.0,2.0,2.0
3,8.0,83.0,1.0,1966.0,675.0,7.0,2.0,3.0,8.0,3.0,...,3.0,2.0,2.0,5.0,216.0,0.0,5.0,1.0,3.0,3.0
4,726.0,1100.0,5.0,14.0,13.0,6.0,2.0,7.0,6.0,11.0,...,2.0,129.0,4.0,5.0,9.0,5.0,1.0,3.0,4.0,4.0


In [11]:
# Check if arrays are the same
(
    np.reshape(fcs_data.events, (-1, fcs_data.channel_count))[
        :, : fcs_data.channel_count - 1
    ]
    == adata_citeseq.X.toarray()
).all()

True