## processing training data for deepcell

In [None]:
import skimage.io as io
import importlib
import os
import numpy as np
import xarray as xr

import sys
sys.path.append("../")

from segmentation.utils import data_utils

In [None]:
# create npz array of labeled images for training
base_dir = 'path_to_training_data'
data_name = "training_freeze_1_H3_NaK"

# load data from previously created xarray
training_data_x = xr.load_dataarray(base_dir + data_name + "_X.nc")
training_data_y = xr.load_dataarray(base_dir + data_name + "_y.nc")

# or create one now
training_data_x = data_utils.load_tifs_from_points_dir(base_dir, tif_folder="raw", tifs=["HH3.tif", "NaK ATPase.tif"])
io.imshow(training_data_x[2, :, :, 1])

training_data_y = data_utils.load_tifs_from_points_dir(base_dir, tif_folder="annotated", tifs=["Cell_Mask_Label.tif"])
io.imshow(training_data_y[2, :, :, 0])

In [None]:
# subset data if only a portion will be used
training_data_x = training_data_x[:, :, :396, :]
training_data_y = training_data_y[:, :, :396, :]

In [None]:
# add blank channels if missing from imaging run
channel_order = ["H3", "NaK ATPase", "Lamin AC"]
non_blank_channels = ["H3", "NaK ATPase"]
training_data_x = data_utils.reorder_xarray_channels(channel_order=channel_order, channel_xr=training_data_x,
                                                           non_blank_channels=non_blank_channels)

In [None]:
# separate out points that will become test points
training_data_x_test = training_data_x.loc[training_data_x.points == "Point12"]
training_data_x = training_data_x.loc[training_data_x.points != "Point12"]
training_data_y_test = training_data_y.loc[training_data_y.points == "Point12"]
training_data_y = training_data_y.loc[training_data_y.points != "Point12"]

np.savez(base_dir + data_name + "_test.npz", X=training_data_x_test, y=training_data_y_test)

In [None]:
# crop data to appropriate size
crop_size = 256
stride = 0.3
training_data_x_cropped = data_utils.crop_image_stack(training_data_x, crop_size=crop_size, stride_fraction=stride)
training_data_y_cropped = data_utils.crop_image_stack(training_data_y, crop_size=crop_size, stride_fraction=stride)

if training_data_y_cropped.shape[:-1] != training_data_x_cropped.shape[:-1]:
    raise ValueError("cropped arrays have different sizes")
else:
    print("looks good")

np.savez(base_dir + data_name + "_{}x{}_stride_{}.npz".format(crop_size, crop_size, stride),
         X=training_data_x_cropped, y=training_data_y_cropped)

In [None]:
# combine different npzs together
npz1 = np.load(base_dir + "R1_Point26_256x256_stride_0.3.npz")
npz2 = np.load(base_dir + "R1_Point32_256x256_stride_0.3.npz")
npz3 = np.load(base_dir + "R6_Point5_256x256_stride_0.3.npz")

combined_x = np.concatenate((npz1["X"], npz2["X"], npz3["X"]), axis=0)
combined_y = np.concatenate((npz1["y"], npz2["y"], npz3["y"]), axis=0)

np.savez(base_dir + "PAH_Caliban_V3_redo.npz", X=combined_x, y=combined_y)