## This notebook is an example: create a copy before running it or you will get merge conflicts!

In [None]:
import sys
sys.path.append('../')

import os
import shutil

import skimage.io as io
import pandas as pd
from mibi_bin_tools import bin_files
from toffy import rosetta

from ark.utils.io_utils import list_folders, list_files

### First, make a folder to hold all of the files related to rosetta processing, and put the full path below

In [None]:
base_dir = 'path/to/base/dir'

### Next, copy over the .bin files for the ~10 FOVs will you use for testing. In addition to the .bin files, make sure to copy over the .JSON files with the same name into this folder. Place them in a folder named *example_bins*.

#### For example, fov-1-scan-1.bin, fov-1-scan-1.json, fov-23-scan-1.bin, fov-23-scan-1.json, etc

In [None]:
# this folder should contain the bins and JSONs for the ~10 fovs
test_bin_dir = os.path.join(base_dir, 'bin_files')

### Next, copy the *commercial_rosetta_matrix.csv* and the *example_panel_file.csv* files from the *files* directory of toffy into *base_dir*. Make sure to update the Target column of *example_panel_file.csv* with the details of your panel. For targets you aren't using, just leave the rows as is, don't delete them. Once you've updated the panel file, put the new name below. 

In [None]:
panel_file_name = 'example_panel_file.csv'

### We'll then use this panel file to extract the images from the bin files


In [None]:
# specify folder to hold extracted files
img_out_dir = os.path.join(base_dir, 'extracted_images')

# Read in updated panel file
panel = pd.read_csv(os.path.join(base_dir, panel_file_name))

# extract the bin files
bin_files.extract_bin_files(test_bin_dir, img_out_dir, panel=panel, intensities=['Au', 'chan_39'])

# replace count images with intensity images
rosetta.replace_with_intensity_image(run_dir=img_out_dir, channel='Au')
rosetta.replace_with_intensity_image(run_dir=img_out_dir, channel='chan_39')

# clean up dirs
rosetta.remove_sub_dirs(run_dir=img_out_dir, sub_dirs=['intensities', 'intensity_times_width'])

# normalize images to allow direct comparison with rosetta
fovs = list_folders(img_out_dir)
for fov in fovs:
    fov_dir = os.path.join(img_out_dir, fov)
    sub_dir = os.path.join(fov_dir, 'normalized')
    os.makedirs(sub_dir)
    chans = list_files(fov_dir)
    for chan in chans:
        img = io.imread(os.path.join(fov_dir, chan))
        img = img / 100
        io.imsave(os.path.join(sub_dir, chan), img, check_contrast=False)

### We'll now process the images with rosetta to remove signal contamination

In [None]:
# pick an informative name
output_folder = 'rosetta_output'

# create sub-folder to hold images and files from this set of multipliers
output_folder_path = os.path.join(base_dir, output_folder)
os.makedirs(output_folder_path)

# compensate the data
rosetta_mat_path = os.path.join(base_dir, 'commercial_rosetta_matrix.csv')
rosetta.compensate_image_data(raw_data_dir=img_out_dir, comp_data_dir=output_folder_path,comp_mat_path=rosetta_mat_path, 
                              raw_data_sub_folder='normalized', panel_info=panel, batch_size=1, norm_const=1)

### Now that we've generated the compensated data, we'll generate stitched images to visualize what signal was removed

In [None]:
# stitch images together to enable easy visualization of outputs
stitched_dir = os.path.join(base_dir, 'stitched_images')
os.makedirs(stitched_dir)

rosetta.create_tiled_comparison(input_dir_list=[img_out_dir, output_folder_path], output_dir=stitched_dir)

# add the source channel for gold and Noodle
for channel in ['Au', 'Noodle']:
    output_dir = os.path.join(base_dir, 'stitched_with_' + channel)
    os.makedirs(output_dir)
    rosetta.add_source_channel_to_tiled_image(raw_img_dir=img_out_dir, tiled_img_dir=stitched_dir,
                                                 output_dir=output_dir, source_channel=channel)

### There will now be a folder named *stitched_with_Au* and *stitched_with_Noodle* within the *base_dir*. You can look through these stitched images to visualize what signal is being removed from the two most common source channels.

### Once you're satisfied that the Rosetta is working appropriately, you can use it to process your entire dataset

In [None]:
# Specify necessary folders

# This should be a folder of run folders. Each folder within bin_file_dir should contain all of the .bin and .json files for that run
bin_file_dir = 'path/to/cohort/all_runs'

# This folder is where all of the extracted images will get saved
extracted_image_dir = 'path/to/cohort/extracted_runs'

In [None]:
# If you only want to extract a subset of your runs, specify their names here; otherwise, leave as None
runs = None
if runs is None:
    runs = list_folders(bin_file_dir)

for run in runs:
    print("processing run {}".format(run))
    current_bin = os.path.join(bin_file_dir, run)
    current_out = os.path.join(extracted_image_dir, run)
    if not os.path.exists(current_out):
        os.makedirs(current_out)
        
    # extract bins and replace gold image
    bin_files.extract_bin_files(current_bin, current_out, panel=panel, intensities=['Au', 'chan_39'])
    rosetta.replace_with_intensity_image(run_dir=current_out, channel='Au')
    rosetta.replace_with_intensity_image(run_dir=current_out, channel='chan_39')
    
    # clean up dirs
    rosetta.remove_sub_dirs(run_dir=current_out, sub_dirs=['intensities', 'intensity_times_width'])

In [None]:
# specify path to save rosetta images
rosetta_image_dir = base_dir + 'rosetta_run_output'

In [None]:
# Perform rosetta on extracted images
for run in runs:
    print("processing run {}".format(run))
    raw_img_dir = os.path.join(extracted_image_dir, run)
    out_dir = os.path.join(rosetta_image_dir, run)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    rosetta.compensate_image_data(raw_data_dir=raw_img_dir, comp_data_dir=out_dir, 
                                 comp_mat_path=rosetta_mat_path, panel_info=panel, batch_size=1)