# Format low-SNR input images for TagRFP-only network

All AutoCellLabeler networks are trained on high-SNR images created by averaging together 60 individual frames. This notebook shows how to format single low-SNR frames for input to the TagRFP-only network to demonstrate its generalizability to low-SNR images.

In [None]:
import os

import pandas as pd 

import nrrd
import numpy as np

import h5py

from tqdm import tqdm

from matplotlib import pyplot as plt

import shutil

import openpyxl
import csv
import re

import itertools

from functools import reduce

from autolabel import *


## Define dataset paths

In [None]:
datasets_prj_neuropal = ["2022-07-15-06", "2022-07-15-12", "2022-07-20-01", "2022-07-26-01", "2022-08-02-01", "2023-01-23-08", "2023-01-23-15", "2023-01-23-21", "2023-01-19-08", "2023-01-19-22", "2023-01-09-28", "2023-01-17-01", "2023-01-19-15", "2023-01-23-01", "2023-03-07-01", "2022-12-21-06", "2023-01-05-18", "2023-01-06-01", "2023-01-06-08", "2023-01-09-08", "2023-01-09-15", "2023-01-09-22", "2023-01-10-07", "2023-01-10-14", "2023-01-13-07", "2023-01-16-01", "2023-01-16-08", "2023-01-16-15", "2023-01-16-22", "2023-01-17-07", "2023-01-17-14", "2023-01-18-01"]
datasets_prj_rim = ["2023-06-09-01", "2023-07-28-04", "2023-06-24-02", "2023-07-07-11", "2023-08-07-01", "2023-06-24-11", "2023-07-07-18", "2023-08-18-11", "2023-06-24-28", "2023-07-11-02", "2023-08-22-08", "2023-07-12-01", "2023-07-01-09", "2023-07-13-01", "2023-06-09-10", "2023-07-07-01", "2023-08-07-16", "2023-08-22-01", "2023-08-23-23", "2023-08-25-02", "2023-09-15-01", "2023-09-15-08", "2023-08-18-18", "2023-08-19-01", "2023-08-23-09", "2023-08-25-09", "2023-09-01-01", "2023-08-31-03", "2023-07-01-01", "2023-07-01-23"]
datasets_prj_aversion = ["2023-03-30-01", "2023-06-29-01", "2023-06-29-13", "2023-07-14-08", "2023-07-14-14", "2023-07-27-01", "2023-08-08-07", "2023-08-14-01", "2023-08-16-01", "2023-08-21-01", "2023-09-07-01", "2023-09-14-01", "2023-08-15-01", "2023-10-05-01", "2023-06-23-08", "2023-12-11-01", "2023-06-21-01"]
datasets_prj_5ht = ["2022-07-26-31", "2022-07-26-38", "2022-07-27-31", "2022-07-27-38", "2022-07-27-45", "2022-08-02-31", "2022-08-02-38", "2022-08-03-31"]
datasets_prj_starvation = ["2023-05-25-08", "2023-05-26-08", "2023-06-05-10", "2023-06-05-17", "2023-07-24-27", "2023-09-27-14", "2023-05-25-01", "2023-05-26-01", "2023-07-24-12", "2023-07-24-20", "2023-09-12-01", "2023-09-19-01", "2023-09-29-19", "2023-10-09-01", "2023-09-13-02"]

datasets = datasets_prj_neuropal + datasets_prj_rim + datasets_prj_aversion + datasets_prj_5ht + datasets_prj_starvation
print(len(set(datasets)) == len(datasets))

datasets_val = ['2023-06-24-02', '2023-08-07-01', '2023-08-19-01', # RIM datasets
                '2022-07-26-01', '2023-01-23-21', '2023-01-23-01', # NeuroPAL datasets
                '2023-07-14-08', # Aversion datasets
                '2022-08-02-31', # 5-HT datasets
                '2023-07-24-27', '2023-07-24-20'] # Starvation datasets
datasets_test = ['2023-08-22-01', '2023-07-07-18', '2023-07-01-23',  # RIM datasets
                 '2023-01-06-01', '2023-01-10-07', '2023-01-17-07', # Neuropal datasets
                 '2023-08-21-01', "2023-06-23-08", # Aversion datasets
                 '2022-07-27-38', # 5-HT datasets
                 '2023-10-09-01', '2023-09-13-02' # Starvation datasets
                 ]
datasets_train = [dataset for dataset in datasets if dataset not in datasets_val and dataset not in datasets_test]

Define locations of the data from different projects.

Alternatively, download the data from [our Dropbox](https://www.dropbox.com/scl/fo/ealblchspq427pfmhtg7h/ALZ7AE5o3bT0VUQ8TTeR1As?rlkey=1e6tseyuwd04rbj7wmn2n6ij7&st=ybsvv0ry&dl=0) under `AutoCellLabeler/lowSNR_immobilized`. It contains ROI files and pre-expanded NRRD image files. If you download the data, set `output_path_nrrd` and `output_path_roi` to the location of the downloaded data.

In [None]:
input_paths = {
    "prj_rim": "/store1/prj_rim/data_processed",
    "prj_neuropal": "/store1/prj_neuropal/data_processed",
    "prj_starvation": "/data1/prj_starvation/data_processed",
    "prj_5ht": "/data3/prj_5ht/published_data/data_processed_neuropal",
    "prj_aversion": "/data1/prj_aversion/data_processed"
}

output_path_nrrd = "/path/to/your/data_dir/nrrd"
output_path_roi_crop = "/path/to/your/data_dir/roi_crop"
output_path_h5 = "/path/to/your/data_dir/h5"

Define dataset-specific paraemeters.

`all_red_paths` should be a dictionary mapping the names of the freely-moving datasets to the names of each of the immobilized TagRFP datasets they correspond to.

`reg_timepts` should be a dictionary mapping the names of the freely-moving datasets to the specific timepoint in the immobilized TagRFP dataset that all of the immobilized NeuroPAL data was registered to.

In [None]:
all_red_paths =  {
    "2023-07-07-18": "2023-07-07-19",
    "2023-08-22-01": "2023-08-22-02",
    "2023-07-01-23": "2023-07-01-24",
    "2023-01-06-01": "2023-01-06-02",
    "2023-01-10-07": "2023-01-10-08",
    "2023-01-17-07": "2023-01-17-08",
    "2023-10-09-01": "2023-10-09-02",
    "2023-09-13-02": "2023-09-13-03",
    "2023-08-21-01": "2023-08-21-02",
    "2023-06-23-08": "2023-06-23-09",
    "2022-07-27-38": "2022-07-27-39"
}

reg_timepts = {
    "2023-07-07-18": "30",
    "2023-08-22-01": "30",
    "2023-07-01-23": "30",
    "2023-01-06-01": "30",
    "2023-01-10-07": "30",
    "2023-01-17-07": "30",
    "2023-10-09-01": "30",
    "2023-09-13-02": "25",
    "2023-08-21-01": "30",
    "2023-06-23-08": "30",
    "2022-07-27-38": "25"
}

## Output datasets

This code copies the datasets from the relevant directories and outputs AutoCellLabeler-compatible `h5` files. If using the data from our Dropbox, set `base_path` to the location where the data was downloaded and comment out the `expand_nrrd_dimension` line of code.

In [None]:
for dataset in datasets_test:
    if dataset in datasets_prj_rim:
        prj_dir = input_paths["prj_rim"]
    elif dataset in datasets_prj_neuropal:
        prj_dir = input_paths["prj_neuropal"]
    elif dataset in datasets_prj_starvation:
        prj_dir = input_paths["prj_starvation"]
    elif dataset in datasets_prj_5ht:
        prj_dir = input_paths["prj_5ht"]
    elif dataset in datasets_prj_aversion:
        prj_dir = input_paths["prj_aversion"]
    else:
        continue
    
    base_path = os.path.join(prj_dir, dataset + "_output", "neuropal", all_red_paths[dataset])

    t = reg_timepts[dataset]
    nrrd_path = os.path.join(base_path, "NRRD_cropped", all_red_paths[dataset] + "_t" + t.zfill(4) + "_ch2.nrrd")
    watershed_path = os.path.join(base_path, "img_roi_watershed", t + ".nrrd")

    # comment this out if using data from our Dropbox
    expand_nrrd_dimension(nrrd_path, os.path.join(output_path_nrrd, dataset + "_" + str(t) + ".nrrd"))

    
    cropout = create_h5_from_nrrd(
        os.path.join(output_path_nrrd, dataset + "_" + str(t) + ".nrrd"), 
        os.path.join(output_path_h5, dataset + "_" + str(t) + ".h5"),
        watershed_path, 
        os.path.join(output_path_roi_crop, dataset + "_" + str(t) + ".h5"),
        (64, 120, 284), # crop size
        185 # number of output channels (ie: valid labels)
    )
