In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from upath import UPath

import pici_vitessce as pv

## Overview

We will convert the data from the original format to an OME-Zarr format which works well with Vitessce and cloud storage. The configs will be generated in the $2^{nd}$ notebook.
<center>
<table>
<tr>
<th> Input Directory Structure </th>
<th> Output Directory Structure </th>
</tr>
<tr>
<td>

```sh
data/
└── processed/
    ├── immune/
    │   ├── cell_table/
    │   │   └── cell_table_immune_thresholded.parquet
    │   ├── image_data/
    │   │   ├── fov1/
    │   │   │   ├── chan1.tiff
    │   │   │   ├── chan2.tiff
    │   │   │   └── ...
    │   │   └── ...
    │   └── segmentation/
    │       └── deepcell_output/
    │           ├── fov1_whole_cell.tiff
    │           └── ...
    ├── tumor/
    │   └── cell_table/
    │       └── cell_table_tumor_thresholded.parquet
    ├── image_data/
    │   ├── fov1/
    │   │   ├── chan1.tiff
    │   │   ├── chan2.tiff
    │   │   └── ...
    │   └── ...
    └── segmentation/
        └── deepcell_output/
            ├── fov1_whole_cell.tiff
            └── ...
```

</td>
<td>

```sh
data/
└── vitessce/
    ├── fov1/
    │   ├── immune/
    │   │   ├── whole_cell_table.zarr
    │   │   ├── image.ome.zarr
    │   │   └── segmentation.ome.zarr
    │   ├── tumor/
    │   │   ├── whole_cell_table.zarr
    │   │   ├── image.ome.zarr
    │   │   └── segmentation.ome.zarr
    │   └── configs/
    │       ├── immune_config.json
    │       ├── tumor_config.json
    │       └── both_config.json
    └── ...
```

</td>
</tr>
</table>
</center>

## Setup Data

### Set up Directories

We set up the directories for ingestion and output here.

In [None]:
processed_data_path = UPath("../data/processed")
vitessce_path = UPath("../data/vitessce")

immune_data_path = processed_data_path / "immune"
immune_cell_table_path = immune_data_path / "cell_table/cell_table_immune_thresholded.parquet"
immune_fov_dir = immune_data_path / "image_data"
immune_fov_paths = immune_fov_dir.glob("[!.]*")
immune_fov_names = [p.name for p in immune_fov_paths]
immune_segmentation_dir = immune_data_path / "segmentation/deepcell_output"


tumor_data_path = processed_data_path / "tumor"
tumor_cell_table_path = tumor_data_path / "cell_table/cell_table_tumor_thresholded.parquet"
tumor_fov_dir = tumor_data_path / "image_data"
tumor_fov_paths = tumor_fov_dir.glob("[!.]*")
tumor_fov_names = [p.name for p in tumor_fov_paths]
tumor_segmentation_dir = tumor_data_path / "segmentation/deepcell_output"

### Set up AnnData column names

In [None]:
immune_markers = [
    "CD11b",
    "CD14",
    "CD141",
    "CD163",
    "CD206",
    "CD209",
    "CD3",
    "CD38",
    "CD4",
    "CD45",
    "CD68",
    "CD8",
    "CD86",
    "Calprotectin",
    "Chym_Tryp",
    "FoxP3",
    "GLUT1",
    "HLA1",
    "HLADR",
    "ICOS",
    "IDO1",
    "Ki67",
    "LAG3",
    "PD1",
    "PDL1",
    "TIM3",
    "TMEM119",
    "Tox",
    "Arginase1",
    "Olig2",
]
tumor_markers = [
    "HLA1",
    "CD14",
    "CD3",
    "CD45",
    "CD8",
    "FOXP3",
    "HLADR",
    "ApoE",
    "B7H3",
    "CD133",
    "EGFR",
    "EGFRvIII",
    "GFAP",
    "GM2_GD2",
    "GPC2",
    "H3K27M",
    "H3K27me3",
    "HER2",
    "IDH1_R132H",
    "Ki67",
    "NG2",
    "Olig2",
    "VISTA",
]
obs_cols = [
    "fov",
    "label",
    "cell_meta_cluster_final_broad",
    "cell_meta_cluster_final",
]
obsm_cols = ["centroid-0", "centroid-1"]

## Convert to Vitessce Accessible Formats

### Convert the Cell Table to AnnData

In [None]:
pv.pp.process_cell_table_to_anndata(
    cell_table_path=immune_cell_table_path,
    fovs=immune_fov_names,
    markers=immune_markers,
    obs_cols=obs_cols,
    obsm_cols=obsm_cols,
    segmentation_dir=immune_segmentation_dir,
    fov_category="immune",
    vitessce_path=vitessce_path,
)

pv.pp.process_cell_table_to_anndata(
    cell_table_path=tumor_cell_table_path,
    fovs=tumor_fov_names,
    markers=tumor_markers,
    obs_cols=obs_cols,
    obsm_cols=obsm_cols,
    segmentation_dir=tumor_segmentation_dir,
    fov_category="tumor",
    vitessce_path=vitessce_path,
    rename_markers={"FoxP3": "FOXP3"},
)

### Convert the FOVs to OME-Zarr Images

In [None]:
pv.pp.convert_fovs_to_zarr(
    fovs=immune_fov_names,
    fov_category="immune",
    markers=immune_markers,
    fovs_dir=immune_fov_dir,
    vitessce_path=vitessce_path,
)
pv.pp.convert_fovs_to_zarr(
    fovs=tumor_fov_names,
    fov_category="tumor",
    markers=tumor_markers,
    fovs_dir=tumor_fov_dir,
    vitessce_path=vitessce_path,
)

### Convert the Segmentation Masks to OME-Zarr Images

In [None]:
pv.pp.convert_segmentations_to_zarr(
    fovs=immune_fov_names,
    fov_category="immune",
    segmentation_mask_suffixes="whole_cell",
    segmentation_dir=immune_segmentation_dir,
    vitessce_path=vitessce_path,
)
pv.pp.convert_segmentations_to_zarr(
    fovs=tumor_fov_names,
    fov_category="tumor",
    segmentation_mask_suffixes="whole_cell",
    segmentation_dir=tumor_segmentation_dir,
    vitessce_path=vitessce_path,
)