In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
import scanpy as sc
import h5py

import sys

print(f'PID number is {os.getpid()}')

PID number is 10880


# Define MeCP2 image file path

In [2]:
### experiment ids are the date MERFISH experiments are started
experiment_ids = ['0722', '0724', '0808', '0809', '1027', '1029', '1101', '1103', '1105', '1107']

In [3]:
# First define that for Mecp2 0722 because it is stored in a different location
prefix_path = {}
add_fill = {} # whether using str(fov).zfill(3) or str(fov)
postfix_path = {}

# adjust the p
prefix_path['0722'] = r'R:\Cosmos\MeCP2\20220722_RNA_MERFISH_Mecp2_0722\Epi-750-647-561-488-405-s50_'
add_fill['0722'] = False
postfix_path['0722'] = '_13.dax'

In [4]:
dax_file_prefix = 'Conv_zscan_'
parent_data_folder = r'\\KOLMOGOROV\Chromatin_NAS_6'
all_folders = os.listdir(parent_data_folder)

for exp in experiment_ids:
    if exp!='0722':
        add_fill[exp] = True
        postfix_path[exp] = '.dax'
        for fd in all_folders:
            if (exp in fd) and ('DNA' not in fd):
                exp_fd = os.path.join(parent_data_folder, fd)
                print(f'{exp_fd} is used for MeCP2_{exp}.')
        for round_fd in os.listdir(exp_fd):
            if ('P1' in round_fd) and ('old' not in round_fd):
                mecp2_fd = os.path.join(exp_fd, round_fd)
                print(f'{mecp2_fd} is used for MeCP2_{exp}.\n')
        prefix_path[exp] = os.path.join(mecp2_fd, dax_file_prefix)

\\KOLMOGOROV\Chromatin_NAS_6\20220724_MOp_Mecp2 is used for MeCP2_0724.
\\KOLMOGOROV\Chromatin_NAS_6\20220724_MOp_Mecp2\H13P1 is used for MeCP2_0724.

\\KOLMOGOROV\Chromatin_NAS_6\20220808_MOp_Mecp2_M2B is used for MeCP2_0808.
\\KOLMOGOROV\Chromatin_NAS_6\20220808_MOp_Mecp2_M2B\H13P1 is used for MeCP2_0808.

\\KOLMOGOROV\Chromatin_NAS_6\20220809_MOp_Mecp2_M2B is used for MeCP2_0809.
\\KOLMOGOROV\Chromatin_NAS_6\20220809_MOp_Mecp2_M2B\H13P1 is used for MeCP2_0809.

\\KOLMOGOROV\Chromatin_NAS_6\20221027_Ma_withDMG is used for MeCP2_1027.
\\KOLMOGOROV\Chromatin_NAS_6\20221027_Ma_withDMG\H20P1 is used for MeCP2_1027.

\\KOLMOGOROV\Chromatin_NAS_6\20221029_Ma_withDMG is used for MeCP2_1029.
\\KOLMOGOROV\Chromatin_NAS_6\20221029_Ma_withDMG\H20P1 is used for MeCP2_1029.

\\KOLMOGOROV\Chromatin_NAS_6\20221101_Mb_withDMG is used for MeCP2_1101.
\\KOLMOGOROV\Chromatin_NAS_6\20221101_Mb_withDMG\H20P1 is used for MeCP2_1101.

\\KOLMOGOROV\Chromatin_NAS_6\20221103_Mb_withDMG is used for MeCP2_1103.

# Define feature file

In [5]:
analysis_parent_folder = r'MERFISH_analysis'

# feature data contains the cell id for each segment
feature_prefix = {} # + str(fov) + '.hdf5'
for exp in experiment_ids:
    fd = os.path.join(analysis_parent_folder, 'Mecp2_' + exp)
    if not os.path.exists(fd):
        fd = os.path.join(analysis_parent_folder, 'Mecp2_' + exp + '_p1')
    feature_fd = os.path.join(fd, 'CellPoseSegment_full_z_DAPI', 'features', 'feature_data_')
    feature_prefix[exp] = feature_fd

# Define offset file

In [6]:
# offset files are the Fiducial alignment output files generated by MERlin
offset_prefix = {} # + str(fov) + '.npy'
for exp in experiment_ids:
    fd = os.path.join(analysis_parent_folder, 'Mecp2_' + exp)
    if not os.path.exists(fd):
        fd = os.path.join(analysis_parent_folder, 'Mecp2_' + exp + '_p1')
    offset_fd = os.path.join(fd, 'FiducialCorrelationWarp', 'transformations', 'offsets_')
    offset_prefix[exp] = offset_fd

# Load adata and parameters

In [7]:
adata = sc.read_h5ad(r"MeCP2_clustered_adata_labeled.h5ad")

In [8]:
uid_to_subcls = {uid:subcls for (uid,subcls) in zip(adata.obs.index.values, adata.obs['subclass'].values)}

In [9]:
################# Define the three parameters that is required for the computation ###############
experiments = []
dax_files = []
feature_files = []
offset_files = []
kept_uids = []

for (exp, fov), df in adata.obs.groupby(['experiment', 'fov']):
    
    experiments.append(exp)
    
    ### dax file
    if add_fill[exp]:
        dax_str_fov = str(fov).zfill(3)
    else:
        dax_str_fov = str(fov)
    _dax_file = prefix_path[exp] + dax_str_fov + postfix_path[exp]
    dax_files.append(_dax_file)
    
    ### feature file
    feature_files.append(feature_prefix[exp]+str(fov)+'.hdf5')
    
    ### offset file
    offset_files.append(offset_prefix[exp]+str(fov)+'.npy')
    
    ### kept uids
    kept_uids.append(df.index.values)

experiments = np.array(experiments)
dax_files = np.array(dax_files)
feature_files = np.array(feature_files)
offset_files = np.array(offset_files)

kept_uid_array = np.empty(len(experiments), dtype=object)
for i, uids in enumerate(kept_uids):
    kept_uid_array[i] = uids

_random_num = 888
experiments[_random_num], dax_files[_random_num], feature_files[_random_num], offset_files[_random_num]

('1027',
 '\\\\KOLMOGOROV\\Chromatin_NAS_6\\20221027_Ma_withDMG\\H20P1\\Conv_zscan_108.dax',
 'E:\\MERFISH\\MERFISH_analysis_full_z\\Mecp2_1027_p1\\CellPoseSegment_full_z_DAPI\\features\\feature_data_108.hdf5',
 'E:\\MERFISH\\MERFISH_analysis_full_z\\Mecp2_1027_p1\\FiducialCorrelationWarp\\transformations\\offsets_108.npy')

# Run on all experiments

In [10]:
import multiprocessing as mp
from utilities import antibody
import time

In [11]:
output_folder = r'output_folder'

for exp in experiment_ids:
    
    output_name = os.path.join(output_folder, 'MeCP2_' + exp + '_antibody.csv')
    
    if os.path.exists(output_name):
        continue
    
    start_time = time.time()
    
    exp_indices = np.where(experiments==exp)[0]
    
    with mp.Pool(20) as pool:
        results = pool.starmap(antibody.calculate_mecp2_signal, 
                           zip(dax_files[exp_indices], feature_files[exp_indices], offset_files[exp_indices], 
                                         kept_uid_array[exp_indices]), 
                                         chunksize=1)
    
    df_output = pd.concat(results, ignore_index=True)
    df_output.to_csv(output_name)
    
    duration = time.time()-start_time
    print(f'Finishe calculating for experiment MeCP2_{exp} in {duration:.2f}s')

Finishe calculating for experiment MeCP2_1027 in 10567.42s
Finishe calculating for experiment MeCP2_1029 in 11462.55s
Finishe calculating for experiment MeCP2_1101 in 9282.50s
Finishe calculating for experiment MeCP2_1103 in 9353.41s
Finishe calculating for experiment MeCP2_1105 in 8788.79s
Finishe calculating for experiment MeCP2_1107 in 8910.17s
