# Import module

The link to get [ImageAnalysis3](https://github.com/zhengpuas47/ImageAnalysis3) 

Or from the Zhuang lab archived [source_tools](https://github.com/ZhuangLab/Chromatin_Analysis_2020_cell/tree/master/sequential_tracing/source)

## ImageAnalysis3 and basic modules

In [1]:
%run "C:\Users\shiwei\Documents\ImageAnalysis3\required_files\Startup_py3.py"
sys.path.append(r"C:\Users\shiwei\Documents")

import ImageAnalysis3 as ia
from ImageAnalysis3 import *
from ImageAnalysis3.classes import _allowed_kwds

import h5py
import ast
import pandas as pd

print(os.getpid())

20732


## Chromatin_analysis_tools etc

See **functions** in the repository for [AnalysisTool_Chromatin](../../README.md)

In [2]:
# Chromatin_analysis_tools (ATC)
# Get path for the py containing functions
import os
import sys
import importlib
module_path =r'C:\Users\shiwei\Documents\AnalysisTool_Chromatin'
if module_path not in sys.path:
    sys.path.append(module_path)
    
# import relevant modules
import gene_selection 
importlib.reload(gene_selection)
import gene_to_loci
importlib.reload(gene_to_loci)
import gene_activity
importlib.reload(gene_activity)
import loci_1d_features
importlib.reload(loci_1d_features)  

import atac_to_loci
importlib.reload(atac_to_loci)

<module 'atac_to_loci' from 'C:\\Users\\shiwei\\Documents\\AnalysisTool_Chromatin\\atac_to_loci.py'>

# Define folders

In [3]:
# main folder for postanalysis
postanalysis_folder = r'L:\Shiwei\postanalysis_2024\v0'
# input files for postanalysis
input_folder = os.path.join(postanalysis_folder, 'resources_from_preprocess')

# output file to be generated
output_main_folder = os.path.join(postanalysis_folder, 'compartment_transcription')

output_analysis_folder = os.path.join(output_main_folder, 'analysis')
output_figure_folder = os.path.join(output_main_folder, 'figures')

# make new folder if needed
make_output_folder = True

if make_output_folder and not os.path.exists(output_analysis_folder):
    os.makedirs(output_analysis_folder)
    print(f'Generating analysis folder: {output_analysis_folder}.')
elif os.path.exists(output_analysis_folder):
    print(f'Use existing analysis folder: {output_analysis_folder}.')
    
if make_output_folder and not os.path.exists(output_figure_folder):
    os.makedirs(output_figure_folder)
    print(f'Generating figure folder: {output_figure_folder}.')
elif os.path.exists(output_figure_folder):
    print(f'Use existing figure folder: {output_figure_folder}.')

Use existing analysis folder: L:\Shiwei\postanalysis_2024\v0\compartment_transcription\analysis.
Use existing figure folder: L:\Shiwei\postanalysis_2024\v0\compartment_transcription\figures.


# Plotting parameters

In [4]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
import matplotlib.pyplot as plt
plt.rc('font', family='serif')
plt.rc('font', serif='Arial')

from ImageAnalysis3.figure_tools import _double_col_width, _single_col_width, _font_size, _ticklabel_size,_ticklabel_width

import seaborn as sns
sns.set_context("paper", rc={"font.size":_font_size,"axes.titlesize":_font_size+1,"axes.labelsize":_font_size})  

In [5]:
# Other required plotting parameters
_dpi = 300
_font_size = 7
_page_width = 5.5


## cell type color-codes

In [6]:
# cell labels from RNA-MERFISH and celltype prediction
selected_cell_labels = ['L2/3 IT','L4/5 IT','L5 IT','L6 IT','L5 ET','L5/6 NP','L6 CT','L6b',
                           'Sst','Pvalb','Lamp5','Sncg','Vip',
                           'Astro','Oligo','OPC','Micro','Endo','VLMC','SMC','Peri', 
                           #'other',
                          ]
# cell palette from RNA-MERFISH UMAP and stats
celltype_palette = {'Astro':'lightcoral', 
                    'Endo':'skyblue', 
                    'L2/3 IT':'gold', 
                    'L4/5 IT':'darkorange', 
                    'L5 ET':'mediumseagreen', 
                    'L5 IT':'aqua',
                    'L5/6 NP':'darkgreen',
                    'L6 CT':'brown',
                    'L6 IT':'magenta',
                    'L6b':'blue', 
                    'Lamp5':'orange', 
                    'Micro':'peachpuff',
                    'OPC':'thistle', 
                    'Oligo':'darkviolet',
                    'Peri':'sandybrown',
                    'Pvalb':'springgreen',
                    'SMC':'rosybrown',
                    'Sncg':'darkkhaki',
                    'Sst':'steelblue', 
                    'VLMC':'saddlebrown', 
                    'Vip':'red',
                    'other':'slategray'}


In [7]:
# this is the plotting order noted based on the snRNA transcriptional acitivty if needed
sorted_cellplot_order_byRNA = ['Micro', 'Oligo', 'Endo', 'OPC', 'Astro', 'Vip', 'Lamp5',
                  'L5/6 NP', 'Sst', 'Sncg', 'Pvalb', 'L4/5 IT', 'L6 CT',
                  'L6 IT', 'L6b', 'L2/3 IT', 'L5 IT', 'L5 ET']

# Load data relevant information

## load and format codebook

[merged codebook](../resources/merged_codebook.csv) as in the repository (merged for all DNA-MERFISH libraries)

In [8]:
# Load codebook 
codebook_fname = os.path.join(input_folder,'merged_codebook.csv')
codebook_df = pd.read_csv (codebook_fname, index_col=0)

# sort df by chr and chr_order
codebook_df = loci_1d_features.sort_loci_df_by_chr_order (codebook_df)
codebook_df.head()

Unnamed: 0,name,id,NDB_784,NDB_755,NDB_826,NDB_713,NDB_865,NDB_725,NDB_817,NDB_710,...,NDB_479,NDB_562,NDB_608,NDB_460,NDB_563,NDB_592,NDB_368,NDB_436,NDB_629,NDB_604
0,1:3742742-3759944,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1:6245958-6258969,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1:8740008-8759916,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1016,1:9627926-9637875,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1017,1:9799472-9811359,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [9]:
# Format the chr loci name by 
# 1. changing loci name format
# 2. extract relevant information such as id, chr, chr_order, and library etc
from gene_to_loci import loci_pos_format
loci_name_list = list(map(loci_pos_format, codebook_df['name'].tolist()))
loci_name_arr = np.array(loci_name_list)

# convert to a new dataframe and set loci name as index
codebook_df = codebook_df[['name','id','chr','chr_order','library']]
codebook_df['loci_name'] = list(loci_name_arr[:,0])
codebook_df = codebook_df.set_index ('loci_name')

codebook_df.head()

Unnamed: 0_level_0,name,id,chr,chr_order,library
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
chr1_3742742_3759944,1:3742742-3759944,1,1,0.0,CTP11
chr1_6245958_6258969,1:6245958-6258969,2,1,1.0,CTP11
chr1_8740008_8759916,1:8740008-8759916,3,1,2.0,CTP11
chr1_9627926_9637875,1:9627926-9637875,1,1,3.0,CTP13
chr1_9799472_9811359,1:9799472-9811359,2,1,4.0,CTP13


# Load pre-summarized AB density medians

Data can be generated from notebook

[2_2_summarize_ab_density_all_loci_subclassby_ensemble](2_2_summarize_ab_density_all_loci_subclassby_ensemble.ipynb)

In [10]:
# the calculated AB desnity ratio for single-cell
#compartment_folder = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\MouseBrain_PostAnalysis'
AB_summary_fname = os.path.join(output_analysis_folder , r'Median_Summary_AB_trans_ratio_notNorm_r0.5_bymajorType_by_pm.csv')

AB_ratio_summary = pd.read_csv(AB_summary_fname, index_col=0)

source_from_CW = False
if source_from_CW:
    AB_ratio_summary['chr'] = AB_ratio_summary.index
    AB_ratio_summary = AB_ratio_summary.set_index('loci_name')
    loci_chr_id_map = {(_chr, _order):v for _chr, _order, v in zip(df_refgen['chr'],
                                                                   df_refgen['chr_order'],
                                                                   df_refgen['loci_name'],)}
    AB_ratio_summary['loci_name'] = AB_ratio_summary.apply(lambda row: loci_chr_id_map[row['chr'], row['hyb']], axis=1)
    AB_ratio_summary = pd.pivot(AB_ratio_summary, index='loci_name', columns='subclass', values='AB_density')
    
AB_ratio_summary

Unnamed: 0_level_0,Oligo,OPC,Micro,Astro,Endo,Peri,L2/3 IT,L4/5 IT,L5 IT,L6 IT,L5 ET,L6 CT,L5/6 NP,L6b,Vip,Pvalb,Lamp5,Sst,Sncg
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
chr1_3742742_3759944,-0.430767,-0.420269,-0.255699,-0.453668,0.132135,-0.140469,-0.560620,-0.460501,-0.446779,-0.488172,-0.421529,-0.455810,-0.304009,-0.472670,-0.114805,-0.289297,-0.309272,-0.325902,-0.186869
chr1_6245958_6258969,-0.153712,-0.206244,-0.047506,-0.226636,0.387080,0.133823,-0.447946,-0.445162,-0.487868,-0.408086,-0.356980,-0.453940,-0.201882,-0.332559,-0.207538,-0.219555,-0.207729,-0.116463,-0.435847
chr1_8740008_8759916,-0.425727,-0.148089,-0.275335,-0.409980,0.080992,0.116621,-0.400666,-0.314289,-0.418091,-0.412230,-0.292372,-0.487285,-0.505100,-0.498226,-0.032649,-0.221845,-0.257861,-0.206280,-0.333840
chr1_9627926_9637875,-0.254296,0.003753,0.080439,-0.209209,0.287116,0.149472,-0.350141,-0.286409,-0.402433,-0.200084,-0.307043,-0.290176,-0.298747,-0.386660,-0.279638,-0.146983,-0.086232,-0.194100,-0.302644
chr1_9799472_9811359,-0.116000,-0.083814,0.234512,-0.073540,0.492773,0.359415,-0.246629,-0.303253,-0.354788,-0.248334,-0.232831,-0.253817,-0.417956,-0.329905,-0.355471,-0.403508,-0.072171,-0.268272,-0.355233
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
chrX_166247682_166259932,0.190443,0.086648,0.045090,0.202411,0.315556,0.383854,-0.157234,-0.122200,-0.238115,-0.214688,-0.064618,-0.077107,-0.332221,-0.080294,-0.227362,-0.265453,-0.371283,-0.219211,-0.668820
chrX_167157164_167167452,0.069587,-0.185342,0.435091,0.024904,0.626812,0.500466,-0.203417,-0.119909,-0.157937,-0.201859,-0.364895,-0.162953,0.024588,-0.201043,-0.109136,-0.048074,-0.359983,-0.203461,-0.322641
chrX_168746045_168757590,-0.138497,-0.285900,-0.019425,-0.079555,0.332414,0.382330,-0.297829,-0.237427,-0.256210,-0.317645,-0.250351,-0.389821,0.137434,-0.329275,-0.261027,-0.155011,-0.287720,-0.211167,-0.682236
chrX_169963295_170005197,-0.095872,0.023907,0.074545,0.013206,0.551413,0.311509,-0.235026,-0.220536,-0.293816,-0.380180,-0.200907,-0.360787,-0.075992,-0.169335,-0.296937,-0.053206,-0.307064,-0.229188,-0.193384


## calculate regression factor between celltypes

In [11]:
### get Abratio normalization dict
from scipy import stats
from sklearn.linear_model import LinearRegression

# use common ref to regress which has more number of data
regress_each = False
fit_intercept=True
ab_regression_coef_dict = {}


if not regress_each:
    ref_cls = 'L2/3 IT'
    for _cls in AB_ratio_summary.columns:
        xs = np.array(AB_ratio_summary[ref_cls].tolist())
        ys = np.array(AB_ratio_summary[_cls].tolist())
        # correlation
        xs_good = xs[(~np.isnan(xs)&(~np.isnan(ys)))]
        ys_good = ys[(~np.isnan(xs)&(~np.isnan(ys)))]
        X=xs_good.reshape(-1, 1)
        y=ys_good.reshape(-1, 1)

        if len(X) > 0:
            reg = LinearRegression(fit_intercept=fit_intercept).fit(X, y)
            ab_regression_coef_dict[_cls] = (reg.coef_[0], reg.intercept_)
        else:
            ab_regression_coef_dict[_cls] = (np.nan, np.nan)

    
else:
    for ref_cls in AB_ratio_summary.columns:
        ab_regression_coef_dict[ref_cls] = {}

        for _cls in AB_ratio_summary.columns:
            xs = np.array(AB_ratio_summary[ref_cls].tolist())
            ys = np.array(AB_ratio_summary[_cls].tolist())
            # correlation
            xs_good = xs[(~np.isnan(xs)&(~np.isnan(ys)))]
            ys_good = ys[(~np.isnan(xs)&(~np.isnan(ys)))]
            X=xs_good.reshape(-1, 1)
            y=ys_good.reshape(-1, 1)

            if len(X) > 0:
                reg = LinearRegression(fit_intercept=fit_intercept).fit(X, y)
                ab_regression_coef_dict[ref_cls][_cls] = (reg.coef_[0], reg.intercept_)
            else:
                ab_regression_coef_dict[ref_cls][_cls] = (np.nan, np.nan)

ab_regression_coef_dict

{'Oligo': (array([0.77329371]), array([0.12521121])),
 'OPC': (array([0.71597982]), array([0.10633413])),
 'Micro': (array([0.68456728]), array([0.21108297])),
 'Astro': (array([0.69342696]), array([0.15908295])),
 'Endo': (array([0.44780838]), array([0.49364204])),
 'Peri': (array([0.49270282]), array([0.48223804])),
 'L2/3 IT': (array([1.]), array([0.])),
 'L4/5 IT': (array([0.86112884]), array([-0.00277903])),
 'L5 IT': (array([0.77141948]), array([-0.03423645])),
 'L6 IT': (array([0.85293924]), array([-0.01805797])),
 'L5 ET': (array([0.63679152]), array([-0.04722389])),
 'L6 CT': (array([0.89872086]), array([-0.01063253])),
 'L5/6 NP': (array([0.58499424]), array([-0.0513388])),
 'L6b': (array([0.57913252]), array([-0.04630086])),
 'Vip': (array([0.46624995]), array([-0.04151498])),
 'Pvalb': (array([0.58214168]), array([-0.03671979])),
 'Lamp5': (array([0.49064784]), array([-0.01925425])),
 'Sst': (array([0.48761871]), array([-0.02805633])),
 'Sncg': (array([0.49812969]), array([

# Process all cellgroups for the markers

## define shared parameters

In [12]:
# the cell groups to be analyzed
sorted_group_order = ['L2/3 IT', 'L4/5 IT', 'L5 IT',
                     'L6 IT', 'L5 ET', 'L6 CT', 'L5/6 NP', 'L6b', 'Vip', 'Pvalb', 'Lamp5',
                     'Sst', ]


In [13]:
# re-import functions
import loci_1d_features as lf
import gene_selection as gs
import gene_to_loci as gl

re_calculate_AB = False

## load marker gene selections and loop processing

Data can be generated from the notebook

[3_1_marker_gene_selection_neurons](3_1_marker_gene_selection_neurons.ipynb)

In [14]:
# compile result as (marker loci of group) by (median of sc for group)
compiled_df = pd.DataFrame(columns = sorted_group_order)

for _marker_group in sorted_group_order[:]:
    
    print(f'Load existing marker gene dataframe for {_marker_group}')

    _marker_savename = _marker_group.replace("/","_")
    _groupby_savename = 'class' #typo in filename from the upstream analysis
    marker_genes_fname = os.path.join(output_analysis_folder,
                                      'marker_neuron',
                                      f'{_groupby_savename}_{_marker_savename}_vs_rest.csv')
    im_loci_df = pd.read_csv(marker_genes_fname, index_col=0)
    im_loci_df = im_loci_df[~im_loci_df.index.str.contains('chrX')]

    ##############################################################################
    # 2. load AB ratio for relevant loci
    for _dir in ['upregulated','downregulated']:

        sel_im_loci_df = im_loci_df[im_loci_df['Expression_change']==_dir]
        # initate result df here so marker of each direction can be independently saved
        _marker_group_df = pd.DataFrame(columns = sorted_group_order)
        # get chr and chr_order
        sel_loci_key_list = lf.sorted_loci_keys_for_loci_dataframe(sel_im_loci_df)

        # use chr and chr_order to retrieve AB ratio from single cell
        for loci_key in sel_loci_key_list:
            # append result for each loci from single cell
            # function to get the AB ratio for all cell group and append together
            if re_calculate_AB:
                _loci_group_df = loci_1d_features.sc_compartment_ratio_by_loci_key (AB_ratio_by_group, 
                                                                               sorted_group_order, 
                                                                                loci_key, 
                                                                                report_type = 'median',
                                                                                average_ratios_in_cell=True, 
                                                                                spot_num_th=500)
            else:
                _loci_name = codebook_df[(codebook_df['chr']==str(loci_key[0])) & (codebook_df['chr_order']==loci_key[1])].index[0]
                _loci_group_df = pd.DataFrame(AB_ratio_summary.loc[_loci_name]).transpose()
                _loci_group_df = _loci_group_df.reset_index(drop=True)

                
            if not regress_each:
                for _group in sorted_group_order:
                    m, b = ab_regression_coef_dict[_group][0],ab_regression_coef_dict[_group][1]
                    _loci_group_df.loc[0, _group] = (_loci_group_df.loc[0, _group]-b)/m 


            # concat loci
            _marker_group_df=pd.concat([_marker_group_df,_loci_group_df])


        # add loci, expression, group, gene info, etc
        _marker_group_df['loci_name'] = sel_im_loci_df.index.tolist()
        _marker_group_df['Marker_gene'] = sel_im_loci_df['Marker_gene'].tolist()
        _marker_group_df['Marker_group']=_marker_group
        _marker_group_df['Expression_change']=_dir

        # regression to normalize ABratio using the marker group as ref_cls
        if regress_each:
            for _group in sorted_group_order:
                m, b = ab_regression_coef_dict[_marker_group][_group][0],ab_regression_coef_dict[_marker_group][_group][1]
                _marker_group_df.loc[:, _group] = (_marker_group_df.loc[:, _group]-b)/m 
            # concat loci
            _marker_group_df=pd.concat([_marker_group_df,_loci_group_df])

        # concat for the marker cell group                  
        compiled_df=pd.concat([compiled_df,_marker_group_df])

    print('================================================================================')

# set loci_name as index 
compiled_df=compiled_df.set_index('loci_name')    
compiled_df.head(10)

Load existing marker gene dataframe for L2/3 IT
Load existing marker gene dataframe for L4/5 IT
Load existing marker gene dataframe for L5 IT
Load existing marker gene dataframe for L6 IT
Load existing marker gene dataframe for L5 ET
Load existing marker gene dataframe for L6 CT
Load existing marker gene dataframe for L5/6 NP
Load existing marker gene dataframe for L6b
Load existing marker gene dataframe for Vip
Load existing marker gene dataframe for Pvalb
Load existing marker gene dataframe for Lamp5
Load existing marker gene dataframe for Sst


Unnamed: 0_level_0,L2/3 IT,L4/5 IT,L5 IT,L6 IT,L5 ET,L6 CT,L5/6 NP,L6b,Vip,Pvalb,...,Oligo,OPC,Micro,Astro,Endo,Peri,Sncg,Marker_gene,Marker_group,Expression_change
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
chr1_55447113_55457262,-0.012087,0.010568,-0.071215,0.042909,-0.225805,-0.335659,-0.154292,0.24691,-0.138061,-0.183021,...,0.501537,-0.012884,0.363483,0.046344,0.398766,0.697102,-0.136335,Plcl1,L2/3 IT,upregulated
chr1_57812315_57822455,0.03001,0.026014,-0.020101,0.00505,-0.491426,-0.328268,-0.561861,-0.010024,0.130936,-0.116095,...,0.154107,-0.080462,0.148876,-0.056487,0.444773,0.677791,0.090681,Spats2l,L2/3 IT,upregulated
chr1_83353943_83364452,-0.142079,-0.138779,-0.18031,-0.181485,-0.177545,-0.273884,-0.21321,-0.219561,-0.148108,0.051375,...,-0.045838,0.009213,-0.054654,-0.097978,0.409423,0.307075,-0.097087,Sphkap,L2/3 IT,upregulated
chr2_83746826_83759988,-0.285016,-0.319229,-0.297294,-0.264417,-0.303577,-0.448286,0.029637,-0.414037,0.159462,-0.405302,...,-0.043924,0.157172,0.281338,0.025703,0.54927,0.464771,-0.268801,Itgav,L2/3 IT,upregulated
chr2_118752499_118759998,0.123334,0.061411,-0.114584,0.141752,0.130809,0.060279,0.059229,0.243248,-0.133742,0.330045,...,0.345743,0.301942,0.662187,0.472879,0.925035,0.779016,0.211947,Pak6,L2/3 IT,upregulated
chr2_135670388_135680581,0.014333,0.050795,-0.122257,-0.000108,-0.186657,-0.017936,0.251213,-0.147799,-0.482289,-0.124177,...,0.034957,0.001804,0.086982,0.18808,0.871917,0.577283,-0.016565,Plcb4,L2/3 IT,upregulated
chr2_136042239_136062239,0.006358,0.031968,-0.092659,0.001756,-0.106366,-0.051063,0.071011,-0.192342,-0.263782,-0.110834,...,0.008933,-0.091364,0.003331,0.010647,0.509978,0.363073,0.135586,Lamp5,L2/3 IT,upregulated
chr3_26249842_26259981,-0.031313,-0.18088,-0.053138,-0.177793,0.112706,-0.268102,-0.390064,-0.50325,-0.230269,-0.422207,...,-0.200104,-0.010918,-0.191664,-0.129296,0.09405,0.197047,-0.048931,Nlgn1,L2/3 IT,upregulated
chr3_36240220_36259969,0.045887,-0.134441,-0.130924,-0.171635,-0.111162,-0.15995,-0.085234,-0.086585,-0.225304,-0.199232,...,-0.025473,0.01339,-0.002306,0.056112,0.271412,0.370254,-0.055472,Qrfpr,L2/3 IT,upregulated
chr3_82044028_82054615,0.00451,-0.216577,-0.035403,-0.067746,-0.116038,-0.268461,-0.11535,-0.150176,-0.50677,-0.232643,...,-0.087592,0.05363,0.115473,0.106235,0.484992,0.628255,-0.241272,Gucy1a1,L2/3 IT,upregulated


In [15]:
_groupby_savename = 'subclass'
compiled_df_savename = os.path.join(output_analysis_folder,f'{_groupby_savename}_marker_AB_trans_ratio_regressed_heatmap_pm_majorType.csv')
compiled_df.to_csv(compiled_df_savename)
compiled_df

Unnamed: 0_level_0,L2/3 IT,L4/5 IT,L5 IT,L6 IT,L5 ET,L6 CT,L5/6 NP,L6b,Vip,Pvalb,...,Oligo,OPC,Micro,Astro,Endo,Peri,Sncg,Marker_gene,Marker_group,Expression_change
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
chr1_55447113_55457262,-0.012087,0.010568,-0.071215,0.042909,-0.225805,-0.335659,-0.154292,0.246910,-0.138061,-0.183021,...,0.501537,-0.012884,0.363483,0.046344,0.398766,0.697102,-0.136335,Plcl1,L2/3 IT,upregulated
chr1_57812315_57822455,0.030010,0.026014,-0.020101,0.005050,-0.491426,-0.328268,-0.561861,-0.010024,0.130936,-0.116095,...,0.154107,-0.080462,0.148876,-0.056487,0.444773,0.677791,0.090681,Spats2l,L2/3 IT,upregulated
chr1_83353943_83364452,-0.142079,-0.138779,-0.180310,-0.181485,-0.177545,-0.273884,-0.213210,-0.219561,-0.148108,0.051375,...,-0.045838,0.009213,-0.054654,-0.097978,0.409423,0.307075,-0.097087,Sphkap,L2/3 IT,upregulated
chr2_83746826_83759988,-0.285016,-0.319229,-0.297294,-0.264417,-0.303577,-0.448286,0.029637,-0.414037,0.159462,-0.405302,...,-0.043924,0.157172,0.281338,0.025703,0.549270,0.464771,-0.268801,Itgav,L2/3 IT,upregulated
chr2_118752499_118759998,0.123334,0.061411,-0.114584,0.141752,0.130809,0.060279,0.059229,0.243248,-0.133742,0.330045,...,0.345743,0.301942,0.662187,0.472879,0.925035,0.779016,0.211947,Pak6,L2/3 IT,upregulated
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
chr15_93506606_93516280,0.279646,0.299144,0.314880,0.192290,0.176774,0.435922,0.341191,0.343930,0.063735,0.188549,...,0.379346,0.262460,0.090410,0.474806,0.698943,0.573447,0.063784,Prickle1,Sst,downregulated
chr16_31248910_31259937,0.298618,0.376398,0.415467,0.314846,0.574299,0.303028,0.114743,0.518095,0.271644,0.482448,...,0.411066,0.485803,0.633806,0.470194,0.933134,0.860813,0.243299,Acap2,Sst,downregulated
chr18_24800347_24810452,-0.235554,-0.158708,-0.046380,-0.166524,-0.103405,-0.128281,-0.116337,-0.097233,-0.293483,-0.521082,...,-0.202280,-0.155881,-0.095815,-0.193410,0.214534,0.218239,-0.089543,Fhod3,Sst,downregulated
chr18_32971777_32983929,-0.168716,-0.019184,-0.103085,-0.224687,-0.191341,-0.130108,-0.117014,-0.432502,-0.436715,-0.385795,...,-0.041980,-0.104640,-0.065321,-0.095007,0.340077,0.379896,-0.258051,Camk4,Sst,downregulated


## normalize the heatmap to relative foldchange

In [16]:
norm_compiled_df = pd.DataFrame(columns=sorted_group_order)

# simply loop to get the ref value (aka the value from the marker group)
ref_value_list = []
for _row_df in compiled_df.iloc():
    ref_group = _row_df['Marker_group']
    ref_value_list.append(_row_df[ref_group])

# norm the relevant columns-dervied matrix
mat = compiled_df[sorted_group_order].to_numpy()
ref_arr = np.array(ref_value_list)[:, np.newaxis]
norm_mat = mat-ref_arr

# add back relevant info
norm_compiled_df[sorted_group_order]=pd.DataFrame(norm_mat)
norm_compiled_df.index = compiled_df.index
for _col in ['Marker_gene','Marker_group','Expression_change']:
    norm_compiled_df[_col]=compiled_df[_col]

norm_compiled_df

Unnamed: 0_level_0,L2/3 IT,L4/5 IT,L5 IT,L6 IT,L5 ET,L6 CT,L5/6 NP,L6b,Vip,Pvalb,Lamp5,Sst,Marker_gene,Marker_group,Expression_change
loci_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
chr1_55447113_55457262,0.000000,0.022654,-0.059128,0.054995,-0.213718,-0.323572,-0.142205,0.258996,-0.125974,-0.170934,0.115349,-0.156570,Plcl1,L2/3 IT,upregulated
chr1_57812315_57822455,0.000000,-0.003996,-0.050110,-0.024960,-0.521436,-0.358278,-0.591870,-0.040033,0.100926,-0.146105,-0.137934,0.078809,Spats2l,L2/3 IT,upregulated
chr1_83353943_83364452,0.000000,0.003300,-0.038231,-0.039406,-0.035466,-0.131805,-0.071131,-0.077482,-0.006029,0.193454,0.140309,0.058188,Sphkap,L2/3 IT,upregulated
chr2_83746826_83759988,0.000000,-0.034213,-0.012279,0.020599,-0.018561,-0.163270,0.314653,-0.129022,0.444477,-0.120287,0.000895,0.144604,Itgav,L2/3 IT,upregulated
chr2_118752499_118759998,0.000000,-0.061924,-0.237919,0.018418,0.007474,-0.063056,-0.064106,0.119914,-0.257077,0.206711,0.326328,0.221448,Pak6,L2/3 IT,upregulated
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
chr15_93506606_93516280,0.353566,0.373063,0.388800,0.266209,0.250693,0.509842,0.415111,0.417849,0.137654,0.262468,0.429054,0.000000,Prickle1,Sst,downregulated
chr16_31248910_31259937,-0.044704,0.033076,0.072145,-0.028476,0.230977,-0.040295,-0.228579,0.174772,-0.071678,0.139126,-0.124549,0.000000,Acap2,Sst,downregulated
chr18_24800347_24810452,0.065464,0.142309,0.254638,0.134494,0.197612,0.172736,0.184680,0.203785,0.007535,-0.220065,0.269659,0.000000,Fhod3,Sst,downregulated
chr18_32971777_32983929,0.276189,0.425720,0.341819,0.220217,0.253563,0.314797,0.327891,0.012402,0.008189,0.059110,0.118010,0.000000,Camk4,Sst,downregulated


In [17]:
norm_compiled_df_savename = os.path.join(output_analysis_folder,f'{_groupby_savename}_marker_AB_trans_ratio_regressed_heatmap_norm_pm_majorType.csv')
norm_compiled_df.to_csv(norm_compiled_df_savename)