# Convert workspace files into single AnnData obect

In [1]:
import os
import numpy as np
import pandas as pd
import flowkit as fk
import anndata as ad

## Get flow data from workspace

In [2]:
base_dir = "FlowKit/data/8_color_data_set/"
sample_path = os.path.join(base_dir, "fcs_files")
wsp_path = os.path.join(base_dir, "8_color_ICS.wsp")

seed = 123

In [3]:
workspace = fk.Workspace(wsp_path, fcs_samples=sample_path)

In [4]:
sample_groups = workspace.get_sample_groups()
sample_groups

['All Samples', 'DEN', 'GEN', 'G69', 'Lyo Cells']

In [5]:
sample_group = 'DEN'

In [6]:
sample_ids = workspace.get_sample_ids()
sample_ids

['101_DEN084Y5_15_E01_008_clean.fcs',
 '101_DEN084Y5_15_E03_009_clean.fcs',
 '101_DEN084Y5_15_E05_010_clean.fcs']

In [7]:
workspace.analyze_samples(sample_group)

### Get gate index for each event for all gates and all samples

In [8]:
gates = pd.DataFrame()
for gate_name, gate_path in workspace.get_gate_ids(sample_ids[0]):
    results = []
    for sample_id in sample_ids:
        result = workspace.get_gate_membership(
            sample_id, 
            gate_name=gate_name, 
            gate_path=gate_path
        )
        results.append(result)
    results = np.concatenate(results)
    gates[':'.join(list(gate_path) + [gate_name])] = results

In [9]:
gates.head()

Unnamed: 0,root:Time,root:Time:Singlets,root:Time:Singlets:aAmine-,root:Time:Singlets:aAmine-:CD3+,root:Time:Singlets:aAmine-:CD3+:CD4+,root:Time:Singlets:aAmine-:CD3+:CD4+:CD107a+,root:Time:Singlets:aAmine-:CD3+:CD4+:IFNg+,root:Time:Singlets:aAmine-:CD3+:CD4+:IL2+,root:Time:Singlets:aAmine-:CD3+:CD4+:TNFa+,root:Time:Singlets:aAmine-:CD3+:CD8+,root:Time:Singlets:aAmine-:CD3+:CD8+:CD107a+,root:Time:Singlets:aAmine-:CD3+:CD8+:IFNg+,root:Time:Singlets:aAmine-:CD3+:CD8+:IL2+,root:Time:Singlets:aAmine-:CD3+:CD8+:TNFa+
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False


### Get event for all samples

In [10]:
dfs = []
for sample_id in sample_ids:
    df = workspace.get_gate_events(sample_id)
    dfs.append(df)
df = pd.concat(dfs)

In [11]:
df.sample(5)

Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,TNFa FITC FLR-A,CD8 PerCP-Cy55 FLR-A,IL2 BV421 FLR-A,Aqua Amine FLR-A,IFNg APC FLR-A,CD3 APC-H7 FLR-A,CD107a PE FLR-A,CD4 PE-Cy7 FLR-A,Time
234387,101_DEN084Y5_15_E03_009_clean.fcs,0.403857,0.325562,0.310123,0.194456,0.178665,0.272096,0.254525,0.242479,0.282847,0.237928,0.247936,0.587036,0.272754,0.637655,0.832343
43398,101_DEN084Y5_15_E03_009_clean.fcs,0.495163,0.3867,0.320121,0.146589,0.13731,0.266895,0.233296,0.22155,0.299568,0.233666,0.238637,0.586473,0.267327,0.628017,0.168456
279014,101_DEN084Y5_15_E01_008_clean.fcs,0.529986,0.417942,0.317021,0.527556,0.459034,0.287319,0.310727,0.281187,0.417927,0.302841,0.253692,0.257075,0.450266,0.296353,0.961764
234062,101_DEN084Y5_15_E05_010_clean.fcs,0.517539,0.39843,0.324737,0.178106,0.168312,0.264548,0.253658,0.684596,0.324576,0.244547,0.244326,0.432067,0.269009,0.267718,0.816393
45945,101_DEN084Y5_15_E03_009_clean.fcs,0.618173,0.526649,0.293446,0.23502,0.214836,0.273487,0.243639,0.27676,0.258647,0.25537,0.240111,0.282921,0.301616,0.225198,0.177502


## Populate AnnData object

### Matrix of values

In [12]:
data = df.iloc[:, 1:].values

In [13]:
adata = ad.AnnData(data, dtype=data.dtype)

In [14]:
adata

AnnData object with n_obs × n_vars = 859431 × 15

In [15]:
adata.X

array([[0.66919339, 0.55024338, 0.30404428, ..., 0.30073305, 0.56641316,
        0.03594016],
       [0.47061452, 0.40513611, 0.29040521, ..., 0.27877141, 0.24435586,
        0.03598285],
       [0.6183387 , 0.51881409, 0.29795775, ..., 0.28183939, 0.26840002,
        0.03602554],
       ...,
       [0.49872547, 0.42063522, 0.29641208, ..., 0.27990703, 0.61764152,
        0.98959426],
       [0.5290575 , 0.34724426, 0.38089722, ..., 0.61813569, 0.52649112,
        0.98960942],
       [0.39496988, 0.33108139, 0.2982423 , ..., 0.43869763, 0.62046722,
        0.98960942]])

### Cell labels

In [16]:
adata.obs_names = np.arange(adata.shape[0]).astype('str')

### Marker labels

In [17]:
adata.var_names = df.columns[1:]

### Attach sample information to each cell

In [18]:
adata.obs['sample_id'] = pd.Categorical(df.sample_id)

In [19]:
adata.obs.head(3)

Unnamed: 0,sample_id
0,101_DEN084Y5_15_E01_008_clean.fcs
1,101_DEN084Y5_15_E01_008_clean.fcs
2,101_DEN084Y5_15_E01_008_clean.fcs


In [20]:
adata

AnnData object with n_obs × n_vars = 859431 × 15
    obs: 'sample_id'

### Add Boolean matrix of gate indices

In [21]:
gates.index = adata.obs['sample_id'].index
adata.obsm['gate_index'] = gates

In [22]:
adata

AnnData object with n_obs × n_vars = 859431 × 15
    obs: 'sample_id'
    obsm: 'gate_index'

### Add unstrcutured data for gating hierarchy and transforms

In [23]:
sample_ids

['101_DEN084Y5_15_E01_008_clean.fcs',
 '101_DEN084Y5_15_E03_009_clean.fcs',
 '101_DEN084Y5_15_E05_010_clean.fcs']

Need to convert transforms to str or cannot save to HDF5

In [24]:
gate_hierarchy = {sample_id: str(workspace.get_gate_hierarchy(sample_id)) for sample_id in sample_ids}

In [25]:
adata.uns['gate_hierarchy'] = gate_hierarchy

In [26]:
transforms = {sample_id: str(workspace.get_transforms(sample_id)) for sample_id in sample_ids}

In [27]:
adata.uns['transforms'] = transforms

### Add marker summary statistics

In [28]:
stats = df.iloc[:10, 1:].describe()
adata.varm['stats'] = stats.T

In [29]:
adata

AnnData object with n_obs × n_vars = 859431 × 15
    obs: 'sample_id'
    uns: 'gate_hierarchy', 'transforms'
    obsm: 'gate_index'
    varm: 'stats'

### Save to disk

In [30]:
adata.write('8_color_data_set.h5ad', compression="gzip")