# Import module

The link to get [ImageAnalysis3](https://github.com/zhengpuas47/ImageAnalysis3) 

Or from the Zhuang lab archived [source_tools](https://github.com/ZhuangLab/Chromatin_Analysis_2020_cell/tree/master/sequential_tracing/source)

## ImageAnalysis3 and basic modules

In [2]:
%run "C:\Users\shiwei\Documents\ImageAnalysis3\required_files\Startup_py3.py"
sys.path.append(r"C:\Users\shiwei\Documents")

import ImageAnalysis3 as ia
from ImageAnalysis3 import *
from ImageAnalysis3.classes import _allowed_kwds

import h5py
import ast
import pandas as pd

print(os.getpid())

18008


## Chromatin_analysis_tools etc

See **functions** in the repository for [AnalysisTool_Chromatin](../../README.md)

In [3]:
# Chromatin_analysis_tools (ATC)
# Get path for the py containing functions
import os
import sys
import importlib
module_path =r'C:\Users\shiwei\Documents\AnalysisTool_Chromatin'
if module_path not in sys.path:
    sys.path.append(module_path)
    
# import relevant modules
import gene_selection 
importlib.reload(gene_selection)
import gene_to_loci
importlib.reload(gene_to_loci)
import gene_activity
importlib.reload(gene_activity)
import loci_1d_features
importlib.reload(loci_1d_features)  

import atac_to_loci
importlib.reload(atac_to_loci)

<module 'atac_to_loci' from 'C:\\Users\\shiwei\\Documents\\AnalysisTool_Chromatin\\atac_to_loci.py'>

# Define folders

In [4]:
# main folder for postanalysis
postanalysis_folder = r'L:\Shiwei\postanalysis_2024\v0'
# input files for postanalysis
input_folder = os.path.join(postanalysis_folder, 'resources_from_preprocess')

# output file to be generated
output_main_folder = os.path.join(postanalysis_folder, 'compartment_transcription')

output_analysis_folder = os.path.join(output_main_folder, 'analysis')
output_figure_folder = os.path.join(output_main_folder, 'figures')

# make new folder if needed
make_output_folder = True

if make_output_folder and not os.path.exists(output_analysis_folder):
    os.makedirs(output_analysis_folder)
    print(f'Generating analysis folder: {output_analysis_folder}.')
elif os.path.exists(output_analysis_folder):
    print(f'Use existing analysis folder: {output_analysis_folder}.')
    
if make_output_folder and not os.path.exists(output_figure_folder):
    os.makedirs(output_figure_folder)
    print(f'Generating figure folder: {output_figure_folder}.')
elif os.path.exists(output_figure_folder):
    print(f'Use existing figure folder: {output_figure_folder}.')

Use existing analysis folder: L:\Shiwei\postanalysis_2024\v0\compartment_transcription\analysis.
Use existing figure folder: L:\Shiwei\postanalysis_2024\v0\compartment_transcription\figures.


# Plotting parameters

In [5]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
import matplotlib.pyplot as plt
plt.rc('font', family='serif')
plt.rc('font', serif='Arial')

from ImageAnalysis3.figure_tools import _double_col_width, _single_col_width, _font_size, _ticklabel_size,_ticklabel_width

import seaborn as sns
sns.set_context("paper", rc={"font.size":_font_size,"axes.titlesize":_font_size+1,"axes.labelsize":_font_size})  

In [6]:
# Other required plotting parameters
_dpi = 300
_font_size = 7
_page_width = 5.5


## cell type color-codes

In [7]:
# cell labels from RNA-MERFISH and celltype prediction
selected_cell_labels = ['L2/3 IT','L4/5 IT','L5 IT','L6 IT','L5 ET','L5/6 NP','L6 CT','L6b',
                           'Sst','Pvalb','Lamp5','Sncg','Vip',
                           'Astro','Oligo','OPC','Micro','Endo','VLMC','SMC','Peri', 
                           #'other',
                          ]
# cell palette from RNA-MERFISH UMAP and stats
celltype_palette = {'Astro':'lightcoral', 
                    'Endo':'skyblue', 
                    'L2/3 IT':'gold', 
                    'L4/5 IT':'darkorange', 
                    'L5 ET':'mediumseagreen', 
                    'L5 IT':'aqua',
                    'L5/6 NP':'darkgreen',
                    'L6 CT':'brown',
                    'L6 IT':'magenta',
                    'L6b':'blue', 
                    'Lamp5':'orange', 
                    'Micro':'peachpuff',
                    'OPC':'thistle', 
                    'Oligo':'darkviolet',
                    'Peri':'sandybrown',
                    'Pvalb':'springgreen',
                    'SMC':'rosybrown',
                    'Sncg':'darkkhaki',
                    'Sst':'steelblue', 
                    'VLMC':'saddlebrown', 
                    'Vip':'red',
                    'other':'slategray'}


In [8]:
# this is the plotting order noted based on the snRNA transcriptional acitivty if needed
sorted_cellplot_order_byRNA = ['Micro', 'Oligo', 'Endo', 'OPC', 'Astro', 'Vip', 'Lamp5',
                  'L5/6 NP', 'Sst', 'Sncg', 'Pvalb', 'L4/5 IT', 'L6 CT',
                  'L6 IT', 'L6b', 'L2/3 IT', 'L5 IT', 'L5 ET']

# Load data relevant information

## load and format codebook

[merged codebook](../resources/merged_codebook.csv) as in the repository (merged for all DNA-MERFISH libraries)

In [9]:
# Load codebook 
codebook_fname = os.path.join(input_folder,'merged_codebook.csv')
codebook_df = pd.read_csv (codebook_fname, index_col=0)

# sort df by chr and chr_order
codebook_df = loci_1d_features.sort_loci_df_by_chr_order (codebook_df)
codebook_df.head()

Unnamed: 0,name,id,NDB_784,NDB_755,NDB_826,NDB_713,NDB_865,NDB_725,NDB_817,NDB_710,...,NDB_479,NDB_562,NDB_608,NDB_460,NDB_563,NDB_592,NDB_368,NDB_436,NDB_629,NDB_604
0,1:3742742-3759944,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1:6245958-6258969,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1:8740008-8759916,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1016,1:9627926-9637875,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1017,1:9799472-9811359,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [10]:
# Format the chr loci name by 
# 1. changing loci name format
# 2. extract relevant information such as id, chr, chr_order, and library etc
from gene_to_loci import loci_pos_format
loci_name_list = list(map(loci_pos_format, codebook_df['name'].tolist()))
loci_name_arr = np.array(loci_name_list)

# convert to a new dataframe and set loci name as index
codebook_df = codebook_df[['name','id','chr','chr_order','library']]
codebook_df['loci_name'] = list(loci_name_arr[:,0])
codebook_df = codebook_df.set_index ('loci_name')

codebook_df.head()

Unnamed: 0_level_0,name,id,chr,chr_order,library
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
chr1_3742742_3759944,1:3742742-3759944,1,1,0.0,CTP11
chr1_6245958_6258969,1:6245958-6258969,2,1,1.0,CTP11
chr1_8740008_8759916,1:8740008-8759916,3,1,2.0,CTP11
chr1_9627926_9637875,1:9627926-9637875,1,1,3.0,CTP13
chr1_9799472_9811359,1:9799472-9811359,2,1,4.0,CTP13


# Load AB assignment



Data be can generated from notebook as:

[4_higher_order_chromosome/4_ab_compartment_by_pm_majorType_assignment](../4_higher_order_chromosome/4_ab_compartment_by_pm_majorType_assignment.ipynb)


In [12]:
AB_summary_folder = r'L:\Shiwei\postanalysis_2024\v0\higher_order_chromosome\analysis'

AB_summary_fname = os.path.join(AB_summary_folder,'AB_assignment_CG_by_pm_majorType.csv')
AB_summary_df = pd.read_csv(AB_summary_fname, index_col=0)

In [13]:
AB_summary_df.head()

Unnamed: 0_level_0,Astro,Endo,GABA,Gluta,Micro,Oligo
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
chr1_3742742_3759944,B,A,B,B,B,B
chr1_6245958_6258969,A,A,B,B,B,B
chr1_8740008_8759916,B,B,B,B,B,B
chr1_9627926_9637875,A,A,B,B,A,B
chr1_9799472_9811359,A,A,B,B,A,A


## assign subclass level AB using majorType AB

In [14]:
majorType_2_subclass_dict = {'Oligo':'Oligo', 'OPC':'Oligo',
                             'Micro':'Micro','Astro':'Astro','Endo':'Endo','Peri':'Endo',
                             'L2/3 IT':'Gluta','L4/5 IT':'Gluta','L5 IT':'Gluta',
                             'L6 IT':'Gluta','L5 ET':'Gluta','L6 CT':'Gluta',
                             'L5/6 NP':'Gluta','L6b':'Gluta',
                             'Vip':'GABA','Pvalb':'GABA',
                             'Lamp5':'GABA','Sst':'GABA','Vip':'GABA','Sncg':'GABA',
                            }

AB_summary_dict = {}
for _celltype, _majorType in majorType_2_subclass_dict.items():
    _AB_id_list = AB_summary_df[_majorType]
    AB_summary_dict [_celltype] = _AB_id_list

AB_summary_df = pd.DataFrame(AB_summary_dict)    
AB_summary_df.head()

Unnamed: 0_level_0,Oligo,OPC,Micro,Astro,Endo,Peri,L2/3 IT,L4/5 IT,L5 IT,L6 IT,L5 ET,L6 CT,L5/6 NP,L6b,Vip,Pvalb,Lamp5,Sst,Sncg
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
chr1_3742742_3759944,B,B,B,B,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B
chr1_6245958_6258969,B,B,B,A,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B
chr1_8740008_8759916,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B,B
chr1_9627926_9637875,B,B,A,A,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B
chr1_9799472_9811359,A,A,A,A,A,A,B,B,B,B,B,B,B,B,B,B,B,B,B


## generate AB vec

In [15]:
sort_by_region = False
from ImageAnalysis3.structure_tools import distance
chr_2_indices, chr_2_orders = distance.Generate_PlotOrder(codebook_df, codebook_df, sort_by_region=sort_by_region)


celltype_chr_2_AB_dict = {}
for _group in AB_summary_df.columns:
    
    celltype_AB_dict = {}
    celltype_AB_dict['A'] = np.where(np.array(AB_summary_df[_group].tolist())=='A')[0]
    celltype_AB_dict['B'] = np.where(np.array(AB_summary_df[_group].tolist())=='B')[0]
    # by chr
    celltype_chr_2_AB = {}
    for _chr in chr_2_indices:
        _chr_AB = {
            'A':np.array([list(chr_2_indices[_chr]).index(_ind) 
                          for _ind in np.intersect1d(chr_2_indices[_chr], celltype_AB_dict['A'])], dtype=np.int32),
            'B':np.array([list(chr_2_indices[_chr]).index(_ind) 
                          for _ind in np.intersect1d(chr_2_indices[_chr], celltype_AB_dict['B'])], dtype=np.int32),
        }
        celltype_chr_2_AB[_chr] = _chr_AB
    
    # append dict
    celltype_chr_2_AB_dict[_group] = celltype_chr_2_AB

In [16]:
celltype_chr_2_AB_dict[_group]

{'1': {'A': array([ 21,  22,  23,  24,  25,  26,  27,  39,  40,  41,  42,  45,  46,
          47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
          60,  61,  62,  63,  64,  67,  73,  74,  75,  76,  77,  78,  79,
          95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 115, 116,
         117, 118, 119, 130, 136, 137, 138, 139, 140, 141, 142, 143, 144,
         145, 146, 147, 148, 149, 150, 151, 152]),
  'B': array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
          13,  14,  15,  16,  17,  18,  19,  20,  28,  29,  30,  31,  32,
          33,  34,  35,  36,  37,  38,  43,  44,  65,  66,  68,  69,  70,
          71,  72,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
          91,  92,  93,  94, 106, 107, 108, 109, 110, 111, 112, 113, 114,
         120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133,
         134, 135])},
 '2': {'A': array([  2,   3,  19,  20,  21,  22,  23,  24,  25,  26,  28,  29,  51,
          52, 

# Calculate AB (or transcription activity) score and density ratio

## load chr2zxy at subclass level



Data can be generated from using the notebook

[preprocess/2_dna_merfish/scripts/2_spot_pick/4_summarize_jie_to_dict](../../preprocess/2_dna_merfish/scripts/2_spot_pick/4_summarize_jie_to_dict.ipynb)

In [19]:
class_2_chr2Zxys_filename = os.path.join(input_folder, 'subclass_2_chr2Zxys.pkl')
class_2_chr2ZxysList = pickle.load(open(class_2_chr2Zxys_filename, 'rb'))

In [20]:
from tqdm import tqdm
from ImageAnalysis3.compartment_tools.density import BatchCompartmentDensities
reload(ia.compartment_tools.density)

<module 'ImageAnalysis3.compartment_tools.density' from 'C:\\Users\\shiwei\\Documents\\ImageAnalysis3\\compartment_tools\\density.py'>

In [21]:
reload(ia.compartment_tools.density)

<module 'ImageAnalysis3.compartment_tools.density' from 'C:\\Users\\shiwei\\Documents\\ImageAnalysis3\\compartment_tools\\density.py'>

In [22]:
class_2_chr2ZxysList.keys()

dict_keys(['Oligo', 'L5 IT', 'Micro', 'Peri', 'Endo', 'Astro', 'OPC', 'L6 CT', 'L5 ET', 'L5/6 NP', 'Pvalb', 'L6 IT', 'Lamp5', 'L6b', 'Sst', 'SMC', 'L4/5 IT', 'L2/3 IT', 'Vip', 'Sncg', 'VLMC'])

## calculate AB density score

In [23]:
gaussian_radius = 0.5 # um
normalize_by_reg_num = False
num_threads= 30

class_2_ScoreDicts = {}

for _sel_class in celltype_chr_2_AB_dict.keys():
    celltype_chr_2_AB = celltype_chr_2_AB_dict[_sel_class]
    
    print(_sel_class, end =' ')
    _class_start = time.time()

    class_2_ScoreDicts[_sel_class] = BatchCompartmentDensities(
        class_2_chr2ZxysList[_sel_class], celltype_chr_2_AB, 
        gaussian_radius, num_threads=num_threads,
        normalize_by_reg_num=normalize_by_reg_num,
        use_cis=False, use_trans=True)
    
    print(f"in {time.time()-_class_start:.2f}s. ")

Oligo in 349.78s. 
OPC in 68.80s. 
Micro in 87.52s. 
Astro in 136.62s. 
Endo in 109.79s. 
Peri in 59.74s. 
L2/3 IT in 155.76s. 
L4/5 IT in 165.11s. 
L5 IT in 138.70s. 
L6 IT in 129.82s. 
L5 ET in 58.84s. 
L6 CT in 294.02s. 
L5/6 NP in 49.96s. 
L6b in 59.47s. 
Vip in 29.54s. 
Pvalb in 70.81s. 
Lamp5 in 35.85s. 
Sst in 54.87s. 
Sncg in 18.64s. 


In [24]:
# save result
ABscores_ensemble_filename = os.path.join(output_analysis_folder, f'AB_trans_scores_notNorm_r{gaussian_radius}_bymajorType_by_pm.pkl')
if not os.path.exists(ABscores_ensemble_filename):
    print(ABscores_ensemble_filename)
    pickle.dump(class_2_ScoreDicts, open(ABscores_ensemble_filename, 'wb'))

L:\Shiwei\postanalysis_2024\v0\compartment_transcription\analysis\AB_trans_scores_notNorm_r0.5_bymajorType_by_pm.pkl


## calculate AB density ratio

In [25]:
# convert into dict
class_2_ABRatioDicts = {}
# loop
for _class, _scoreDicts in class_2_ScoreDicts.items():
    print(_class, len(_scoreDicts))
    
    _ABratioDicts = [] 
    for _scoreDict in _scoreDicts:
        _chr_2_abratios = {}
        for _chr, _chr_AB_dict in _scoreDict.items():
            _AB_ratios = np.log(_chr_AB_dict['A']) - np.log(_chr_AB_dict['B'])
            _chr_2_abratios[_chr] = _AB_ratios
        # append
        _ABratioDicts.append(_chr_2_abratios)
    # assign
    class_2_ABRatioDicts[_class] = _ABratioDicts

Oligo 6605


  _AB_ratios = np.log(_chr_AB_dict['A']) - np.log(_chr_AB_dict['B'])
  _AB_ratios = np.log(_chr_AB_dict['A']) - np.log(_chr_AB_dict['B'])


OPC 1452
Micro 1836
Astro 4970
Endo 3738
Peri 957
L2/3 IT 3966
L4/5 IT 3388
L5 IT 3337
L6 IT 2428
L5 ET 1624
L6 CT 5602
L5/6 NP 784
L6b 1044
Vip 422
Pvalb 1317
Lamp5 540
Sst 1031
Sncg 173


In [26]:
ABratio_ensemble_filename = os.path.join(output_analysis_folder, f'AB_trans_ratio_notNorm_r{gaussian_radius}_bymajorType_by_pm.pkl')
if not os.path.exists(ABratio_ensemble_filename):
    print(ABratio_ensemble_filename)
    pickle.dump(class_2_ABRatioDicts, open(ABratio_ensemble_filename, 'wb'))

L:\Shiwei\postanalysis_2024\v0\compartment_transcription\analysis\AB_trans_ratio_notNorm_r0.5_bymajorType_by_pm.pkl
