# Notebook for extracting cell level results post-bigfish
## For HCRv3 analaysis mode with no cluster decomposition elements
## Using Big-fish v0.6.2
## Revised: 2023.05.22

In [1]:
import os
import time

from skimage.measure import regionprops
import numpy as np
import tifffile
import matplotlib.pyplot as plt
import pathlib
import pandas as pd

import bigfish
import bigfish.stack as stack
import bigfish.multistack as multistack 

print("Big-FISH version: {0}".format(bigfish.__version__))

Big-FISH version: 0.6.2


In [17]:
# Assign directories of npz files and masks
npz_dir = "/Users/jefflee/Desktop/npz_dir/"
npz_files = [x for x in os.listdir(npz_dir) if x.endswith('.npz')]
mask_dir = "/Users/jefflee/Desktop/mask_dir/"
mask_prefix = "Hys_"
npz_files[0]

'130323_Dhc8-1-6-6_Gdi565_Rab11647_L2_1_ch1_bfoutput.npz'

In [141]:
# Assign directories of npz files and masks
npz_dir = "/Users/jefflee/Desktop/npz_dir/"
npz_files = [x for x in os.listdir(npz_dir) if x.endswith('.npz')]
mask_dir = "/Users/jefflee/Desktop/mask_dir/"
mask_prefix = "Hys_"

for file in npz_files:

    # get bigfish data
    print(f"processing {file}")
    data = np.load(os.path.join(npz_dir, file))
    spots_post_subpixel = data["spots"]

    # get mask image
    mask_filename = f"{mask_prefix}{file[0:-17]}.tif"
    # print(f"mask file is {mask_filename}")
    masks = tifffile.imread(os.path.join(mask_dir, mask_filename))

    # extract cell level results
    fov_results = []
    cell_label = masks.astype(np.int64)
    cells = regionprops(cell_label)
    for cell in cells:
        cell_results = {}

        # cell id
        label = cell.label
        cell_results["cell_id"] = label

        # rna_coord
        coord_int = np.floor(spots_post_subpixel).astype(np.int64)[:, 0:3]
        within_mask = []
        for rna in coord_int:
            if masks[tuple(rna)] == label:
                in_mask = 1
            elif masks[tuple(rna)] == 0:
                in_mask = 0
            within_mask.append(in_mask)
        spots_post_subpixel_masked = np.column_stack((spots_post_subpixel[:, 0:3], within_mask))
        cell_results["rna_coord"] = spots_post_subpixel[spots_post_subpixel_masked[:, 3] == 1]

        fov_results.append(cell_results)

        for i, cell_results in enumerate(fov_results):
            output_path = pathlib.Path(mask_dir).joinpath(
                f"{os.path.basename(file)}_results_cell_{i}.npz"
            )
            stack.save_cell_extracted(cell_results, str(output_path))
    

processing 130323_Dhc8-1-6-6_Gdi565_Rab11647_L2_1_ch1_bfoutput.npz
processing 130323_Dhc8-1-6-6_Gdi565_Rab11647_L2_1_ch0_bfoutput.npz


In [143]:
indir = mask_dir
infiles = [x for x in os.listdir(indir) if x.endswith('.npz')]

# prepare buckets for the data
file_name = []
channel = []
total_RNAs = []
cytosolic_RNAs = []
foci_count = []
mol_per_foci = []
    
# parse .npz files
for file in infiles:
    data = np.load(os.path.join(indir, file))
    file_name.append(file)
    total_RNAs.append(data['rna_coord'].shape[0])

    if 'ch0' in file:
        channel.append('ch0')
    if 'ch1' in file:
        channel.append('ch1')  
    if 'ch2' in file:
        channel.append('ch2')
    if 'ch3' in file:
        channel.append('ch3')
    if 'ch4' in file:
        channel.append('ch4')

data_tall = pd.DataFrame(list(zip(file_name, channel, total_RNAs)),
            columns =['file_name', 'channel', 'total_RNAs'])

data_tall

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,file_name,channel,total_RNAs,cytosolic_RNAs,foci_count,mol_per_foci
0,130323_Dhc8-1-6-6_Gdi565_Rab11647_L2_1_ch1_bfo...,ch1,148,78.0,2,35.0
1,130323_Dhc8-1-6-6_Gdi565_Rab11647_L2_1_ch0_bfo...,ch0,31,31.0,0,


In [26]:
# save a csv
data_tall.to_csv('smFISH_final_summary_output.csv')