# Analysis of DNA-MERFISH for CTP11-12-13 spots 


The link to get [ImageAnalysis3](https://github.com/zhengpuas47/ImageAnalysis3) 

or the Zhuang lab archived [source_tools](https://github.com/ZhuangLab/Chromatin_Analysis_2020_cell/tree/master/sequential_tracing/source)

In [1]:
%run "C:\Users\shiwei\Documents\ImageAnalysis3\required_files\Startup_py3.py"
sys.path.append(r"C:\Users\shiwei\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

import h5py
from ImageAnalysis3.classes import _allowed_kwds
import ast

import pandas as pd

12728


See **functions** in the repository for [AnalysisTool_Chromatin](../../functions/README.md)

In [2]:
# Chromatin_analysis_tools (ATC)
# Get path for the py containing functions
import os
import sys
import importlib
module_path =r'C:\Users\shiwei\Documents\AnalysisTool_Chromatin'
if module_path not in sys.path:
    sys.path.append(module_path)
    
# import relevant modules
import gene_selection 
importlib.reload(gene_selection)
import gene_to_loci
importlib.reload(gene_to_loci)
import gene_activity
importlib.reload(gene_activity)
import loci_1d_features
importlib.reload(loci_1d_features)  

import atac_to_loci
importlib.reload(atac_to_loci)

<module 'atac_to_loci' from 'C:\\Users\\shiwei\\Documents\\AnalysisTool_Chromatin\\atac_to_loci.py'>

In [3]:
import tqdm

In [60]:
import matplotlib
import matplotlib.pyplot as plt
plt.rc('font', family='serif')
plt.rc('font', serif='Arial')
plt.style.use('dark_background')

matplotlib.rcParams['pdf.fonttype'] = 42

#  0. Define output folder

In [69]:
output_main_folder = r'L:\Shiwei\Figures\MOp_draft_2023_v1\Scheme_and_RNA_MERFISH'
output_analysis_folder = os.path.join(output_main_folder, 'analysis')
output_figure_folder = os.path.join(output_main_folder, 'figures')

make_output_folder = True

if make_output_folder and not os.path.exists(output_analysis_folder):
    os.makedirs(output_analysis_folder)
    print(f'Generating analysis folder: {output_analysis_folder}.')
elif os.path.exists(output_analysis_folder):
    print(f'Use existing analysis folder: {output_analysis_folder}.')
    
if make_output_folder and not os.path.exists(output_figure_folder):
    os.makedirs(output_figure_folder)
    print(f'Generating figure folder: {output_figure_folder}.')
elif os.path.exists(output_figure_folder):
    print(f'Use existing figure folder: {output_figure_folder}.')


Use existing analysis folder: L:\Shiwei\Figures\MOp_draft_2023_v1\Scheme_and_RNA_MERFISH\analysis.
Use existing figure folder: L:\Shiwei\Figures\MOp_draft_2023_v1\Scheme_and_RNA_MERFISH\figures.


# 1. Load all DAPI volume and library info

## Load sorted codebook with cell type info

In [4]:
# load codebook
# L drive is Crick Pu_SSD_0
analysis_save_folder=r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MouseBrain_PostAnalysis_20230201'

# Load sorted codebook (allows direct matrix slicing) with cell type info
# Load codebook (then sort to allow direct matrix slicing) with cell type info

celltype_codebook_fname = os.path.join(analysis_save_folder,'merged_codebook.csv')
celltype_codebook_df = pd.read_csv (celltype_codebook_fname, index_col=0)

# sort df temporailiy so matrix can be sliced by df order directly
celltype_codebook_df = loci_1d_features.sort_loci_df_by_chr_order (celltype_codebook_df)

celltype_codebook_df[['name','chr','chr_order']].head()

Unnamed: 0,name,chr,chr_order
0,1:3742742-3759944,1,0.0
1,1:6245958-6258969,1,1.0
2,1:8740008-8759916,1,2.0
1016,1:9627926-9637875,1,3.0
1017,1:9799472-9811359,1,4.0


## Load DAPI volume

In [5]:
# trans
# the calculated AB desnity ratio for single-cell
exp_main_folder = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations'

#exp_folders = [os.path.join(exp_main_folder,f, 'csv') for f in os.listdir(exp_main_folder) if 'exp' in f]
exp_folders = [os.path.join(exp_main_folder,f, 'csv') for f in os.listdir(exp_main_folder) if 'exp' in f and 'v2' in f]

exp_folders 
#os.path.exists(exp_folders[0])

['\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0316_from_0304_v2\\csv',
 '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0402_from_0329_v2\\csv',
 '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0419_from_0415_v2\\csv',
 '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\csv']

In [6]:
#exp_df_list =[]
_df_file_list = []
for _folder in exp_folders:
    if os.path.exists(_folder):
        _df_file_list.extend([os.path.join(_folder, f) for f in os.listdir(_folder) if '.csv' in f])


In [7]:
exp_df_list = [ ]

for _f in _df_file_list:
    exp_df_list.append(pd.read_csv(_f))
    
print(len(exp_df_list))

327


In [8]:
merged_cell_volume_df = pd.concat(exp_df_list)
merged_cell_volume_df.set_index('uid', drop=True,inplace=True)
merged_cell_volume_df

Unnamed: 0_level_0,fov_id,orig_cellID,feature_volume,nucleus_volume_DAPI,confidence_score,fov_edge
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
276583549005001281310253810318041068047,0,1,5.286679e+05,279241.527778,0.5,1
103420171094780316930229829293672994552,0,2,3.382644e+05,195024.791667,0.5,1
314844972520915742869654555830989384599,0,3,2.419376e+05,57840.023148,0.5,1
109560860268592786158662035265255130891,0,4,2.025881e+06,64409.861111,0.5,0
270350000043071906183619565260086304156,0,5,6.215574e+05,290495.000000,0.5,0
...,...,...,...,...,...,...
64854774451287321962209816693938330264,99,151,1.549149e+05,,0.0,0
41884351258426342777940164067907564064,99,152,1.680081e+04,,0.0,0
10385920982210596646107185956261998480,99,153,2.865559e+04,,0.0,0
217987427530123484115758408189837054067,99,154,8.042465e+03,179057.141204,1.0,0


# 3. Select a fov with cell type to insepct DPAI segmentation

In [18]:
sel_experiment = '20220304'
sel_fov_id = 1

sel_df = adata_full.obs[adata_full.obs['experiment']==sel_experiment]
#sel_df = sel_df[sel_df['fov_id']==sel_fov_id]
sel_df = sel_df[sel_df['fov']==sel_fov_id]

sel_exp_folder = [folder for folder in exp_folders if sel_experiment[4:] in folder][0]
nuc_seg_folder = sel_exp_folder.replace('csv',f'FOV-{sel_fov_id}')
print(nuc_seg_folder)

if os.path.exists(nuc_seg_folder):
    nuc_seg_fname_list = [os.path.join(nuc_seg_folder, _fl) for _fl in os.listdir(nuc_seg_folder) if '.npy' in _fl]

nuc_seg_fname_list[:3]

\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0316_from_0304_v2\FOV-1


['\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0316_from_0304_v2\\FOV-1\\Nucleus_fov-1_cell-1.npy',
 '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0316_from_0304_v2\\FOV-1\\Nucleus_fov-1_cell-10.npy',
 '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0316_from_0304_v2\\FOV-1\\Nucleus_fov-1_cell-100.npy']

# 4. Select celltype to plot

In [None]:
celltype = "L5 ET"

In [30]:
## dict to save the good cell after visual inspection
selected_cell_fname_dict = {}

## add selected cells in order

In [34]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-118\Nucleus_fov-118_cell-29.npy'
print(celltype)

L5 ET


In [37]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-177\Nucleus_fov-177_cell-51.npy'
print(celltype)

L5 IT


In [40]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-15\Nucleus_fov-15_cell-48.npy'
print(celltype)

L4/5 IT


In [43]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0402_from_0329_v2\FOV-86\Nucleus_fov-86_cell-71.npy'
print(celltype)

L2/3 IT


In [46]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-112\Nucleus_fov-112_cell-69.npy'
print(celltype)

Pvalb


In [51]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-9\Nucleus_fov-9_cell-30.npy'
print(celltype)

Astro


In [55]:
selected_cell_fname_dict[celltype] = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-99\Nucleus_fov-99_cell-32.npy'
print(celltype)

Oligo


In [58]:
selected_cell_fname_dict[celltype] =r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MOp_dapi_segmentations\exp0713_from_0418_v2\FOV-155\Nucleus_fov-155_cell-24.npy'
print(celltype)

Endo


## check the updated dict

In [66]:
selected_cell_fname_dict

{'L5 ET': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\FOV-118\\Nucleus_fov-118_cell-29.npy',
 'L5 IT': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\FOV-177\\Nucleus_fov-177_cell-51.npy',
 'L4/5 IT': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\FOV-15\\Nucleus_fov-15_cell-48.npy',
 'L2/3 IT': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0402_from_0329_v2\\FOV-86\\Nucleus_fov-86_cell-71.npy',
 'Pvalb': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\FOV-112\\Nucleus_fov-112_cell-69.npy',
 'Astro': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\FOV-9\\Nucleus_fov-9_cell-30.npy',
 'Oligo': '\\\\10.245.74.158\\Chromatin_NAS_8\\Analyzed_data\\MOp_dapi_segmentations\\exp0713_from_0418_v2\\FOV-99\\Nucleu

## save the cell image dict

In [67]:
import pickle

In [68]:
cellsave_dictfname = os.path.join(output_analysis_folder,'selected_dapi_image_fnames.pkl')

pickle.dump(selected_cell_fname_dict, open(cellsave_dictfname,'wb'))