__Author:__ Bogdan Bintu

__Email:__ bbintu@g.harvard.edu

__Date:__ 3/4/2020



#### Note: This assumes Python 2

In [1]:
# Imports
import numpy as np
import glob,os,sys
import matplotlib.pylab as plt

import workers #worker package to parallelize

#Warning: Installing ipyparallel is recomended

### 1. Raw imaging data structure description

This code assumes the data is stored in a __master_folder__
with subfolders organized by rounds of hybridization i.e.:

__H1R1,R2__ - correpsonds to the 2nd round of imaging the fluorsecent signal in which readout 3 is imaged in the first color channel, readout 4 is imaged in the second color channel  and fiducial beads are imaged in the third color chanel. For each z-pozition the three color chanels are imaged, the z-piezo is moved one step (100-250nm) and then the imaging of the three color channels is repeated etc. This folder contains multiple .dax imaging files (and associated info files) organized per field of view (i.e. Conv_zscan_00.dax correponds to the 1st field of view, Conv_zscan_01.dax correponds to the 2nd field of view etc.).

__H2R3,R4__ - correpsonds to the 2nd round of imaging the fluorsecent signal in which readout 3 is imaged in the first color channel, readout 4 is imaged in the second color channel  and fiducial beads are imaged in the third color chanel. The z-step imaging and file format within each folder follows the same description above fora each folder unless specified.

...

__H0B,B__ - corresponds to the first round of imaging, before flowing readout sequences (typically across all color 5 available channels in an alternating fashion)
(B - indicates that there is no fluorescent signal in that channel)

...

__H1Q1,Q2,Q3__ - correpsonds to the 1nd round of imaging the fluorsecent signal of RNA (Q-denotes RNA readouts) in which RNA readout 1 is imaged in the first color channel, RNA readout 2 is imaged in the second color channel, RNA readout 3 is imaged in the third color channel  and fiducial beads are imaged in the fourth color chanel.


### 2. Organize the data and flatten the illumination profile

In [6]:
######### Get the info for the current project
master_folder=r'master_DNA_folder' 

hfolders = [folder for folder in glob.glob(master_folder+os.sep+'*')
           if os.path.isdir(folder) and glob.glob(folder+os.sep+'*.dax')>0 and os.path.basename(folder)[0]!='F']
hinds = [workers.getH(hfolder) for hfolder in hfolders]
hfolders = np.array(hfolders)[np.argsort(hinds)]
fovs = map(os.path.basename,glob.glob(hfolders[0]+os.sep+'*.dax'))
analysis_folder = master_folder+'-Analysis'
if not os.path.exists(analysis_folder): os.makedirs(analysis_folder)

In [9]:
######### compute the median value across all fields of view - this helps flatten the illumination
num_cols,remove_first = 4,0
hfolder = hfolders[10]
print hfolder
meds_fl = analysis_folder+os.sep+'im_meds.npy'
if not os.path.exists(meds_fl):
    im_meds = [np.mean([workers.get_frame(hfolder+os.sep+fov,ind_z=ind_col+remove_first) for fov in fovs],axis=0) 
               for ind_col in range(num_cols)]
    np.save(meds_fl,np.array(im_meds,dtype=np.float32))

\\dolly\Raw_data\Bogdan\8_26_2019__IMR90_6hAmanitin_STORM65\H112B,R3,R5


In [None]:
#### check illumination profile
im_med = np.load(meds_fl)
plt.figure(figsize=(5,5))
im = im_med[3]
plt.imshow(im,vmax=np.percentile(im,95)*1.2)
plt.colorbar()

### 3. Run the rough alignment and fiducial drift fitting across all fileds of view and all imaging rounds

In [None]:

def ref_fl(dax_fl,ref_tags = ['H20B,B','H20B,B']):
    htag = os.path.basename(os.path.dirname(dax_fl))
    fov = os.path.basename(dax_fl)
    ref_tag = ref_tags[-1] if 'Q' in htag else ref_tags[0]
    ref_hfolder = [hfolder for hfolder in hfolders if ref_tag in os.path.basename(hfolder)][0]
    dax_fl_ref = ref_hfolder+os.sep+fov
    return dax_fl_ref
paramaters = []
overwrite_fits,overwrite_drift=False,False
for fovind in range(len(fovs)):
    for hind in range(len(hfolders)):
        htag = os.path.basename(hfolders[hind])
        dax_fl = hfolders[hind]+os.sep+fovs[fovind]
        ref_dax_fl = ref_fl(dax_fl)
        paramaters.append((dax_fl,ref_dax_fl,overwrite_fits,overwrite_drift))
print len(paramaters)

#### Run across all data in paralell

In [24]:
#Run the workers in parallel to perform rough registration and fit the beads in the data.
# while workers can also perform rough (yet fast) fitting of the signal data, 
#this is mostly used for testing as the gaussian fitting is more precise
#Add path to the system to be able to import 

#Open terminal and run: ipcluster start -n 20
import ipyparallel as ipp
from ipyparallel import Client
rc = Client()
import workers
reload(workers)
def f(parm):
    import sys
    sys.path.append(r'E:\Bogdan\Dropbox\code_Seurat\WholeGenome_MERFISH\Analysis_code')
    import workers
    reload(workers)
    
    success = workers.run_fitting_analysis(parm,remove_first=0,im_pad=0,fit_colors=False)
    return success

In [None]:
res = rc[:].map_sync(f,paramaters[:])

#### Example of single run output 

In [None]:
f(paramaters[3000])