In [1]:
# Connect to the local database
from cytopy.data.setup import global_init
global_init("cytopy_db")
# CytoPy imports
from cytopy.data.project import Project
from cytopy.data.read_write import get_fcs_file_paths
# Other additional imports
from tqdm.auto import tqdm
import pandas as pd
import os

<h1>Setting up the FlowCAP project</h1>

Create a `Project` document for FlowCAP. I need to specify the project ID and where to store HDF5 files locally; these will house the single cell data.

In [2]:
flowcap = Project(project_id="FlowCAP", data_directory="/media/ross/SP PHD U3/FlowCAP_CytoPy")
flowcap.save()

<Project: Project object>

We can reload the project using the `objects` method of `Project` with the query `project_id="FlowCAP"`. This generates a `QuerySet`; a list of documents where the query is `True`. We expect there to only be one document therefore we can use the `get` method to fetch the `Project`. This syntax can be used for any mongoengine document, but we will rarely need to interact with documents this way beyond the `Project` document.

I'm going to create an experiment for each of the datasets in the FlowCAP project. These datasets will be used for validation of the `CellClassifier` in a later notebook. To add an experiment, I use the `add_experiment` method, specifying the panel template (an excel file stored in the panels folder in the same directory as this notebook) and a unique identifier for the experiment.

In [3]:
# We can reload the project document using the object method
flowcap = Project.objects(project_id="FlowCAP").get()
for x in ["CFSE", "GvHD", "Lymph", "NDD", "StemCell"]:
    flowcap.add_experiment(experiment_id=x,
                           panel_definition=f"panels/flowcap/{x}.xlsx")

Below I've created a little utility function to help add fcs files to my experiments. The fcs files are added in sequence using the `add_fcs_files` method. The files don't require any compensation and for a couple of files I'm expecting a channel to be missing, so I tell CytoPy to just warn me and not throw an error.

In [4]:
def add_fcs_files(experiment,
                  fcs_dir):
    files = [f for f in os.listdir(fcs_dir) if f.endswith(".fcs")]
    for f in tqdm(files):
        filepath = os.path.join(fcs_dir, f)
        experiment.add_fcs_files(sample_id=f.replace(".fcs", ""),
                                 primary=filepath,
                                 compensate=False,
                                 verbose=False,
                                 missing_error="warn")

In [1]:
# Local path where FlowCAP fcs files are kept
dirpath = "/media/ross/extdrive/FlowCAP/FlowCAP1/Data/FCM/fcs"

In [6]:
cfse = flowcap.get_experiment("CFSE")
gvhd = flowcap.get_experiment("GvHD")
lymph = flowcap.get_experiment("Lymph")
ndd = flowcap.get_experiment("NDD")
stemcell = flowcap.get_experiment("StemCell")

In [7]:
add_fcs_files(cfse, f"{dirpath}/CFSE/FCS")

  0%|          | 0/13 [00:00<?, ?it/s]

In [8]:
add_fcs_files(gvhd, f"{dirpath}/GvHD/FCS")

  0%|          | 0/12 [00:00<?, ?it/s]

In [9]:
add_fcs_files(lymph, f"{dirpath}/Lymph/FCS")

  0%|          | 0/30 [00:00<?, ?it/s]

  warn(f"Missing channel {x.standard}")


In [10]:
add_fcs_files(ndd, f"{dirpath}/NDD/FCS")

  0%|          | 0/30 [00:00<?, ?it/s]

In [11]:
add_fcs_files(stemcell, f"{dirpath}/StemCell/FCS")

  0%|          | 0/30 [00:00<?, ?it/s]