### This is a notebook to run the preprocessing pipeline prior to uploading your TIFs to deepcell

In [2]:
# import required packages
import os
import numpy as np
import skimage.io as io
import xarray as xr

# add package to system path
import sys
sys.path.append("../")

from segmentation.utils import data_utils

## This script is currently configured as a template to run with the provided example data. If running your own data, make a copy of this notebook first before modifying it.                         Go to file-> make a copy to create a copy of this notebook

In [3]:
# load TIFs from GUI-based directory structure
data_dir = "../data/example_dataset/input_data/"

# either get all points in the folder
points = os.listdir(data_dir)
points = [point for point in points if os.path.isdir(data_dir + point) and point.startswith("Point")]

# optionally, select a specific set of points manually
# points = ["Point1", "Point2"]

In [4]:
print(points)

['Point8']


In [5]:
# optionally, specify a set of channels to be summed together for better contrast

sum_channels_xr = data_utils.load_imgs_from_dir(data_dir, img_sub_folder="TIFs", fovs=points,
                                                             imgs=["CD3.tif", "CD8.tif"])

channel_sum = np.sum(sum_channels_xr.values, axis=3, dtype="uint8")

new_channel_name = "summed_channel"

summed_xr = xr.DataArray(np.expand_dims(channel_sum, axis=-1), 
                         coords=[sum_channels_xr.fovs, sum_channels_xr.rows,
                                 sum_channels_xr.cols, [new_channel_name]],
                         dims=sum_channels_xr.dims)

tif_saves = [new_channel_name]
for point in summed_xr.fovs.values:
    for tif in tif_saves:
        save_path = os.path.join(data_dir, point, "TIFs", tif + ".tif")
        io.imsave(save_path, summed_xr.loc[point, :, :, tif].values.astype("uint8"))



In [11]:
print(sum_channels_xr)
print(sum_channels_xr.shape)
print(sum_channels_xr.dims)

<xarray.DataArray (fovs: 1, rows: 1024, cols: 1024, channels: 2)>
array([[[[0, 0],
         ...,
         [1, 1]],

        ...,

        [[2, 0],
         ...,
         [1, 0]]]], dtype=int16)
Coordinates:
  * fovs      (fovs) <U6 'Point8'
  * rows      (rows) int64 0 1 2 3 4 5 6 ... 1017 1018 1019 1020 1021 1022 1023
  * cols      (cols) int64 0 1 2 3 4 5 6 ... 1017 1018 1019 1020 1021 1022 1023
  * channels  (channels) <U3 'CD3' 'CD8'
(1, 1024, 1024, 2)
('fovs', 'rows', 'cols', 'channels')


In [9]:
print(channel_sum)

[[[0 0 0 ... 1 1 2]
  [2 2 1 ... 1 2 0]
  [1 2 1 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 2 ... 1 1 0]
  [2 1 1 ... 1 1 1]]]


In [10]:
print(summed_xr)

<xarray.DataArray (fovs: 1, rows: 1024, cols: 1024, channels: 1)>
array([[[[0],
         ...,
         [2]],

        ...,

        [[2],
         ...,
         [1]]]], dtype=uint8)
Coordinates:
  * fovs      (fovs) <U6 'Point8'
  * rows      (rows) int64 0 1 2 3 4 5 6 ... 1017 1018 1019 1020 1021 1022 1023
  * cols      (cols) int64 0 1 2 3 4 5 6 ... 1017 1018 1019 1020 1021 1022 1023
  * channels  (channels) <U14 'summed_channel'


In [6]:
# load channels to be included in deepcell data
tifs = ["HH3.tif", "Membrane.tif"]
data_xr = data_utils.load_imgs_from_dir(data_dir, img_sub_folder="TIFs", fovs=points, imgs=tifs)

# save xarray for running through deepcell
data_xr.to_netcdf(os.path.join(data_dir, "deepcell_input.xr"), format="NETCDF3_64BIT")

In [13]:
print(data_xr)
print(data_xr.shape)

<xarray.DataArray (fovs: 1, rows: 1024, cols: 1024, channels: 2)>
array([[[[0, 0],
         ...,
         [0, 0]],

        ...,

        [[0, 0],
         ...,
         [0, 0]]]], dtype=int16)
Coordinates:
  * fovs      (fovs) <U6 'Point8'
  * rows      (rows) int64 0 1 2 3 4 5 6 ... 1017 1018 1019 1020 1021 1022 1023
  * cols      (cols) int64 0 1 2 3 4 5 6 ... 1017 1018 1019 1020 1021 1022 1023
  * channels  (channels) <U8 'HH3' 'Membrane'
(1, 1024, 1024, 2)


In [None]:
# Notes:
# In data_utils, the code defaults to the test image because variable_sizes is false
# 

### Questions:
# Minor question: I understand that we're defaulting to the same image for both, but are the two deepcell notebooks
# basically importing the same things using different methodologies? Which one are we currently using?

# On your own: are the channels we're interested in HH3 and Membrane or 