# Merging Results from Pseudobulk DEG analysis

In [None]:
import scanpy as sc
import decoupler as dc

# Only needed for processing
import numpy as np
import pandas as pd

# Needed for some plotting
import matplotlib.pyplot as plt

import os
import anndata as ad
import scanorama

from wrapper_functions import *

In [None]:
# Automatically re-load wrapper functions after an update
# Find details here: https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [None]:
organism = Organism.mouse
analyze_params = Analyze(protocol=Protocol.FF, organism=organism)

In [None]:
root_path = os.getcwd()
inpath='your_inpath_folder' # Replace with the location of your samples
results_folder = os.path.join(root_path, 'analyzed', 'DE', 'DE_Results_14c')

## Differentially Expressed Genes

### List of relevant genes based on prior knowledge (literaute)

In [None]:
literature_list = pd.read_csv("Factors_AAV_processing_complete.csv", header= None)

In [None]:
literature_list = literature_list.set_index(0).drop_duplicates()

In [None]:
literature_list['GeneName'] = literature_list.index

In [None]:
literature_list

In [None]:
folder_path = os.path.join(inpath, results_folder, 'DEG')  
all_dfs = []  # To store all your dataframes

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        df['Condition'] = filename  
        df['Condition'] = df['Condition'].str.replace('.csv', '', regex=False)  
        all_dfs.append(df)

In [None]:
combined_df = pd.concat(all_dfs, ignore_index=True)

In [None]:
combined_df

In [None]:
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['GeneName'].isin(literature_list['GeneName'])]

I check for the deregulated genes in any condition

In [None]:
pvalue_tresh = 0.05
Genes_literature_DEG = filtered_df[filtered_df['padj'] < pvalue_tresh]['GeneName'].unique().tolist()

In [None]:
Genes_literature_DEG

In [None]:
Genes_literature_DEG_df = filtered_df[filtered_df['GeneName'].isin(Genes_literature_DEG)]

In [None]:
Genes_literature_DEG_df

In [None]:
predefined_order = ['female_AVV2_vs_Control_periportal_Results', 
                    'female_AVV2_vs_Control_other_Results', 
                    'female_AVV2_vs_Control_pericentral_Results', 
                    'female_AVV9_vs_Control_periportal_Results', 
                    'female_AVV9_vs_Control_other_Results', 
                    'female_AVV9_vs_Control_pericentral_Results',
                    'male_AVV2_vs_Control_periportal_Results', 
                    'male_AVV2_vs_Control_other_Results', 
                    'male_AVV2_vs_Control_pericentral_Results', 
                    'male_AVV9_vs_Control_periportal_Results', 
                    'male_AVV9_vs_Control_other_Results', 
                    'male_AVV9_vs_Control_pericentral_Results',] 

In [None]:
import seaborn as sns
from matplotlib.patches import Patch

Genes_literature_DEG_df['Sex'] = Genes_literature_DEG_df['Condition'].str.split('_', expand=True)[[0]]
Genes_literature_DEG_df['Treatment'] = Genes_literature_DEG_df['Condition'].str.split('_', expand=True)[1]
Genes_literature_DEG_df['Region'] = Genes_literature_DEG_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = Genes_literature_DEG_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

pivot_df_pvalues = Genes_literature_DEG_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
Genes_literature_DEG_df_unique = Genes_literature_DEG_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
Genes_literature_DEG_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(Genes_literature_DEG_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(Genes_literature_DEG_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(Genes_literature_DEG_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = Genes_literature_DEG_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = Genes_literature_DEG_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = Genes_literature_DEG_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(Genes_literature_DEG_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap


cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 12), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


### List of relevant genes in the context of lipid metabolism, then circadian clock and immune related processes

In [None]:
genes_positive = ["Elovl3", "Chka", "Irs2", "Srebf1", "Ppard", "Acot1", "Cpt2", "Dbp", "Nfil3", "Nr1d1", "Nr1d2", "Tef", "Arntl", "Gadd45a", "Gadd45g", "Irf2bp2", "Ifitm2", "Caprin1", "Rnf125", "Ripk2", "Id2"]

In [None]:
combined_df

In [None]:
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['GeneName'].isin(genes_positive)]

In [None]:
filtered_df

In [None]:
filtered_df['Sex'] = filtered_df['Condition'].str.split('_', expand=True)[[0]]
filtered_df['Treatment'] = filtered_df['Condition'].str.split('_', expand=True)[1]
filtered_df['Region'] = filtered_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = filtered_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")
pivot_df_pvalues = filtered_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
filtered_df_unique = filtered_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
filtered_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(filtered_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(filtered_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(filtered_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = filtered_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = filtered_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = filtered_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(filtered_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order].reindex(genes_positive), cmap="vlag", figsize=(10, 12), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order].reindex(genes_positive), fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False, row_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


In [None]:
pivot_df_logFC[predefined_order].index

### List of relevant genes from the publication: Predicted deleterious variants in the human genome relevant to gene therapy with adeno-associated virus vectors
https://www.cell.com/molecular-therapy-family/methods/fulltext/S2329-0501%2823%2900175-4#tbl1

In [None]:
genes_positive = [
    "AU040320", "Ndst1", "Tm9sf2", "B3galt6", "Arf1", "Itgav", "Itga5", 
    "Itgb1", "Itgb5", "Rpsa", "Pdgfra", "Cd9", "Fgfr1", "B4galt1", 
    "Arhgap26", "Cdc42", "Dnm1", "Met", "Egfr", "B3gat3", "Slc35b2", 
    "Ext1", "Ext2", "Cog7", "Rab7a", "Stx5", "Rab11a", "Vps35l", 
    "Vps53", "Rab6a", "Rgp1", "Atp2c1", "Gpr108", "Atp6v0a2", 
    "Rnf121", "Kpnb1"
]

In [None]:
combined_df

In [None]:
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['GeneName'].isin(genes_positive)]

In [None]:
filtered_df

In [None]:
filtered_df['Sex'] = filtered_df['Condition'].str.split('_', expand=True)[[0]]
filtered_df['Treatment'] = filtered_df['Condition'].str.split('_', expand=True)[1]
filtered_df['Region'] = filtered_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = filtered_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")
pivot_df_pvalues = filtered_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
filtered_df_unique = filtered_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
filtered_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(filtered_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(filtered_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(filtered_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = filtered_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = filtered_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = filtered_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(filtered_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 12), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


In [None]:
genes_negative = [
    "Ube2i", "Uba2", "Sae1",
    "Suv4-20h1", "Chaf1a", "Npat", "Casp8ap2", "Phc3", "Phf5a", 
    "Sf3b2", "U2af1", "Zfp638", "Fkbp4", "Mre11a", "Rad50", 
    "Nbn", "Ifnar2", "Wwc2", "Adgra2", "Rtbdn", "Troap", "Sox15", 
    "Slc5a2", "Clic2", "Grpr"
]

In [None]:
filtered_df = combined_df[combined_df['GeneName'].isin(genes_negative)]

In [None]:
filtered_df

In [None]:
filtered_df['Sex'] = filtered_df['Condition'].str.split('_', expand=True)[[0]]
filtered_df['Treatment'] = filtered_df['Condition'].str.split('_', expand=True)[1]
filtered_df['Region'] = filtered_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = filtered_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")
pivot_df_pvalues = filtered_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
filtered_df_unique = filtered_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
filtered_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(filtered_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(filtered_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(filtered_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = filtered_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = filtered_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = filtered_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(filtered_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 12), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18))
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


## Genes responding different in the pericentral and periportal region in particular conditions

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results','male_AVV9_vs_Control_periportal_Results']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_genes = filtered_df['GeneName'].unique().tolist()

In [None]:
different_genes = []

for current_gene in all_genes:
    
    filtered_df_gene = filtered_df[filtered_df['GeneName'] == current_gene]
    
    conditions = filtered_df_gene['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_gene['padj'] < 0.05) ) and \
                    (filtered_df_gene['log2FoldChange'].tolist()[0] * filtered_df_gene['log2FoldChange'].tolist()[1] < 0)
        
        if criteria_met:
            different_genes.append(current_gene)

In [None]:
filtered_df_genes = filtered_df[filtered_df['GeneName'].isin(different_genes)]

In [None]:
filtered_df_genes.to_csv('analyzed/some_results/male_AVV9_different_CentralPortal.csv')
# filtered_df_genes[filtered_df_genes['GeneName'] == 'Klf11']

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results','male_AVV2_vs_Control_periportal_Results']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_genes = filtered_df['GeneName'].unique().tolist()

In [None]:
different_genes = []

for current_gene in all_genes:
    
    filtered_df_gene = filtered_df[filtered_df['GeneName'] == current_gene]
    
    conditions = filtered_df_gene['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_gene['padj'] < 0.05) ) and \
                    (filtered_df_gene['log2FoldChange'].tolist()[0] * filtered_df_gene['log2FoldChange'].tolist()[1] < 0)
        
        if criteria_met:
            different_genes.append(current_gene)

In [None]:
filtered_df_genes = filtered_df[filtered_df['GeneName'].isin(different_genes)]

In [None]:
filtered_df_genes.to_csv('analyzed/some_results/male_AVV2_different_CentralPortal.csv')
# filtered_df_genes[filtered_df_genes['GeneName'] == 'Klf11']

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results','female_AVV9_vs_Control_periportal_Results']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_genes = filtered_df['GeneName'].unique().tolist()

In [None]:
different_genes = []

for current_gene in all_genes:
    
    filtered_df_gene = filtered_df[filtered_df['GeneName'] == current_gene]
    
    conditions = filtered_df_gene['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_gene['padj'] < 0.05) ) and \
                    (filtered_df_gene['log2FoldChange'].tolist()[0] * filtered_df_gene['log2FoldChange'].tolist()[1] < 0)
        
        if criteria_met:
            different_genes.append(current_gene)

In [None]:
filtered_df_genes = filtered_df[filtered_df['GeneName'].isin(different_genes)]

In [None]:
filtered_df_genes.to_csv('analyzed/some_results/female_AVV9_different_CentralPortal.csv')
# filtered_df_genes[filtered_df_genes['GeneName'] == 'Klf11']

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results','female_AVV2_vs_Control_periportal_Results']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_df[combined_df['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_genes = filtered_df['GeneName'].unique().tolist()

In [None]:
different_genes = []

for current_gene in all_genes:
    
    filtered_df_gene = filtered_df[filtered_df['GeneName'] == current_gene]
    
    conditions = filtered_df_gene['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_gene['padj'] < 0.05) ) and \
                    (filtered_df_gene['log2FoldChange'].tolist()[0] * filtered_df_gene['log2FoldChange'].tolist()[1] < 0)
        
        if criteria_met:
            different_genes.append(current_gene)

In [None]:
filtered_df_genes = filtered_df[filtered_df['GeneName'].isin(different_genes)]

In [None]:
filtered_df_genes.to_csv('analyzed/some_results/female_AVV2_different_CentralPortal.csv')
# filtered_df_genes[filtered_df_genes['GeneName'] == 'Klf11']

In [None]:
# Genes_list_df = combined_df[combined_df['GeneName'].isin(different_genes)]

# Genes_list_df['Sex'] = Genes_list_df['Condition'].str.split('_', expand=True)[[0]]
# Genes_list_df['Treatment'] = Genes_list_df['Condition'].str.split('_', expand=True)[1]
# Genes_list_df['Region'] = Genes_list_df['Condition'].str.split('_', expand=True)[4]

# pivot_df_logFC = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

# pivot_df_pvalues = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
# pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

# pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
# Genes_list_df_unique = Genes_list_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
# Genes_list_df_unique.set_index("Condition", inplace=True)

# sex_colors = ['#c66874','#67c1ca']
# treatment_colors = ['#687ac8','#c8b866']
# region_colors = ['#c98367',  '#6ac989','#9c69c8']

# sex_lut = dict(zip(Genes_list_df_unique['Sex'].unique(), sex_colors))
# treatment_lut = dict(zip(Genes_list_df_unique['Treatment'].unique(), treatment_colors))
# region_lut = dict(zip(Genes_list_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
# col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
# col_colors['Sex'] = Genes_list_df_unique['Sex'].map(sex_lut)
# col_colors['Treatment'] = Genes_list_df_unique['Treatment'].map(treatment_lut)
# col_colors['Region'] = Genes_list_df_unique['Region'].map(region_lut)

# legend_elements = []
# for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
#    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
#     for label, color in zip(Genes_list_df_unique[cond].unique(), cmap):
#        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
#cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 22), linewidths=0.75, linecolor= 'black',
#                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
#                    col_colors =col_colors, xticklabels=False, cbar_pos=(0.05, 0.65, 0.05, 0.12), col_cluster=False)
#cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
#cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
#cg.cax.set_title('Log2 FC', pad=10)
#cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.7))
#cg.ax_heatmap.add_artist(cond_legend)
#cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
#cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
#cg.ax_heatmap.set_xlabel('')
# cg.ax_heatmap.set_ylabel('')

# cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
#                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
# cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
#                               cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

# plt.setp(cg.ax_heatmap.get_yticklabels(), fontsize=8)
# plt.savefig('/home/valdeola/Figs_Bettina/DEGs.jpg', dpi=300)

### DEG significant across conditions

#### Males and Females AAV9 treatment versus control significant in pericentral

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results', 'male_AVV9_vs_Control_pericentral_Results']
combined_df.fillna(1, inplace=True)

In [None]:
Gene_list = []
for current_condition in conditions_to_check:
    current_genes = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['padj']< 0.05)]['GeneName'].unique()
    Gene_list.append(current_genes)  

In [None]:
# Convert the first array to a set
common_elements = set(Gene_list[0])

# Perform intersection with the rest of the arrays
for arr in Gene_list[1:]:
    common_elements.intersection_update(arr)
    
common_elements_list = list(common_elements)
common_elements_list

In [None]:
Genes_list_df = combined_df[combined_df['GeneName'].isin(common_elements_list)]

Genes_list_df['Sex'] = Genes_list_df['Condition'].str.split('_', expand=True)[[0]]
Genes_list_df['Treatment'] = Genes_list_df['Condition'].str.split('_', expand=True)[1]
Genes_list_df['Region'] = Genes_list_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

pivot_df_pvalues = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
Genes_list_df_unique = Genes_list_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
Genes_list_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(Genes_list_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(Genes_list_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(Genes_list_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = Genes_list_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = Genes_list_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = Genes_list_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(Genes_list_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 22), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, cbar_pos=(0.05, 0.65, 0.05, 0.12), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.7))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

# plt.setp(cg.ax_heatmap.get_yticklabels(), fontsize=8)
plt.savefig('/home/valdeola/Figs_Bettina/DEGs.jpg', dpi=300)

#### Males AAV9 versus control and AAV2 versus control significantly deregulated in opposite directions in pericentral

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results', 'male_AVV2_vs_Control_pericentral_Results']
combined_df.fillna(1, inplace=True)

In [None]:
Gene_list = []
for current_condition in conditions_to_check:
    current_genes = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['padj']< pvalue_tresh)]['GeneName'].unique()
    Gene_list.append(current_genes)  

In [None]:
# Convert the first array to a set
common_elements = set(Gene_list[0])

# Perform intersection with the rest of the arrays
for arr in Gene_list[1:]:
    common_elements.intersection_update(arr)
    
common_elements_list = list(common_elements)
common_elements_list

In [None]:
genes_different_behaviour = []
for current_gene in common_elements_list: 
    log2FoldChange_list = []
    for current_condition in conditions_to_check:
        current_Log2FC = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['GeneName'] == current_gene)]['log2FoldChange'].unique()
        log2FoldChange_list.append(current_Log2FC)
    sign = log2FoldChange_list[0] * log2FoldChange_list[1]
    
    if sign < 0: 
        genes_different_behaviour.append(current_gene)

In [None]:
genes_different_behaviour

In [None]:
Genes_list_df = combined_df[combined_df['GeneName'].isin(genes_different_behaviour)]

Genes_list_df['Sex'] = Genes_list_df['Condition'].str.split('_', expand=True)[[0]]
Genes_list_df['Treatment'] = Genes_list_df['Condition'].str.split('_', expand=True)[1]
Genes_list_df['Region'] = Genes_list_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

pivot_df_pvalues = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
Genes_list_df_unique = Genes_list_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
Genes_list_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(Genes_list_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(Genes_list_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(Genes_list_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = Genes_list_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = Genes_list_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = Genes_list_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(Genes_list_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


#### Males AAV2 versus control and Females AAV2 versus control significantly deregulated in periportal

In [None]:
conditions_to_check=['male_AVV2_vs_Control_periportal_Results', 'female_AVV2_vs_Control_periportal_Results']
combined_df.fillna(1, inplace=True)

In [None]:
Gene_list = []
for current_condition in conditions_to_check:
    current_genes = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['padj']< pvalue_tresh)]['GeneName'].unique()
    Gene_list.append(current_genes)  

In [None]:
# Convert the first array to a set
common_elements = set(Gene_list[0])

# Perform intersection with the rest of the arrays
for arr in Gene_list[1:]:
    common_elements.intersection_update(arr)
    
common_elements_list = list(common_elements)
common_elements_list

In [None]:
Genes_list_df = combined_df[combined_df['GeneName'].isin(common_elements_list)]

Genes_list_df['Sex'] = Genes_list_df['Condition'].str.split('_', expand=True)[[0]]
Genes_list_df['Treatment'] = Genes_list_df['Condition'].str.split('_', expand=True)[1]
Genes_list_df['Region'] = Genes_list_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

pivot_df_pvalues = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
Genes_list_df_unique = Genes_list_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
Genes_list_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(Genes_list_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(Genes_list_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(Genes_list_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = Genes_list_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = Genes_list_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = Genes_list_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(Genes_list_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


#### Males AAV2 versus control and Females AAV2 versus control significantly deregulated in pericentral

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results', 'female_AVV2_vs_Control_pericentral_Results']
combined_df.fillna(1, inplace=True)

In [None]:
Gene_list = []
for current_condition in conditions_to_check:
    current_genes = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['padj']< pvalue_tresh)]['GeneName'].unique()
    Gene_list.append(current_genes)  

In [None]:
# Convert the first array to a set
common_elements = set(Gene_list[0])

# Perform intersection with the rest of the arrays
for arr in Gene_list[1:]:
    common_elements.intersection_update(arr)
    
common_elements_list = list(common_elements)
common_elements_list

In [None]:
Genes_list_df = combined_df[combined_df['GeneName'].isin(common_elements_list)]

Genes_list_df['Sex'] = Genes_list_df['Condition'].str.split('_', expand=True)[[0]]
Genes_list_df['Treatment'] = Genes_list_df['Condition'].str.split('_', expand=True)[1]
Genes_list_df['Region'] = Genes_list_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

pivot_df_pvalues = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
Genes_list_df_unique = Genes_list_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
Genes_list_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(Genes_list_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(Genes_list_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(Genes_list_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = Genes_list_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = Genes_list_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = Genes_list_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(Genes_list_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


#### Males AAV9 versus control and AAV2 versus control significantly deregulated in periportal in different directions

In [None]:
conditions_to_check=['male_AVV9_vs_Control_periportal_Results', 'male_AVV2_vs_Control_periportal_Results']
combined_df.fillna(1, inplace=True)

In [None]:
Gene_list = []
for current_condition in conditions_to_check:
    current_genes = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['padj']< pvalue_tresh)]['GeneName'].unique()
    Gene_list.append(current_genes)  

In [None]:
# Convert the first array to a set
common_elements = set(Gene_list[0])

# Perform intersection with the rest of the arrays
for arr in Gene_list[1:]:
    common_elements.intersection_update(arr)
    
common_elements_list = list(common_elements)
common_elements_list

In [None]:
genes_different_behaviour = []
for current_gene in common_elements_list: 
    log2FoldChange_list = []
    for current_condition in conditions_to_check:
        current_Log2FC = combined_df[(combined_df['Condition'] == current_condition) & (combined_df['GeneName'] == current_gene)]['log2FoldChange'].unique()
        log2FoldChange_list.append(current_Log2FC)
    sign = log2FoldChange_list[0] * log2FoldChange_list[1]
    
    if sign < 0: 
        genes_different_behaviour.append(current_gene)

In [None]:
genes_different_behaviour

In [None]:
Genes_list_df = combined_df[combined_df['GeneName'].isin(genes_different_behaviour)]

Genes_list_df['Sex'] = Genes_list_df['Condition'].str.split('_', expand=True)[[0]]
Genes_list_df['Treatment'] = Genes_list_df['Condition'].str.split('_', expand=True)[1]
Genes_list_df['Region'] = Genes_list_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="log2FoldChange")

pivot_df_pvalues = Genes_list_df.pivot(columns = 'Condition', index = "GeneName", values="padj")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
Genes_list_df_unique = Genes_list_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
Genes_list_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(Genes_list_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(Genes_list_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(Genes_list_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = Genes_list_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = Genes_list_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = Genes_list_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(Genes_list_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, vmax=1, vmin=-1, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Log2 FC', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


## Pathways

In [None]:
folder_path_pathways = os.path.join(inpath, results_folder, 'Pathway_results')  
all_pathways_dfs = []  # To store all your dataframes

for filename in os.listdir(folder_path_pathways):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_pathways, filename)
        df = pd.read_csv(file_path)
        df.columns = ['Pathway', 'ActivityScore', 'Pvalue']
        df['Condition'] = filename  
        df['Condition'] = df['Condition'].str.replace('.csv', '', regex=False) 
        df.set_index('Pathway', inplace=True)
        all_pathways_dfs.append(df)

In [None]:
all_pathways_df = pd.concat(all_pathways_dfs, ignore_index=False)

In [None]:
all_pathways_df

In [None]:
all_pathways_df['Sex'] = all_pathways_df['Condition'].str.split('_', expand=True)[[0]]
all_pathways_df['Treatment'] = all_pathways_df['Condition'].str.split('_', expand=True)[1]
all_pathways_df['Region'] = all_pathways_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = all_pathways_df.pivot(columns = 'Condition', values="ActivityScore")
pivot_df_pvalues = all_pathways_df.pivot(columns = 'Condition', values="Pvalue")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)

# Map the conditions to colors
all_pathways_df_unique = all_pathways_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
all_pathways_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(all_pathways_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(all_pathways_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(all_pathways_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = all_pathways_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = all_pathways_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = all_pathways_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(all_pathways_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Activity Score', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])
plt.savefig('/home/valdeola/Figs_Bettina/Pathways.jpg', dpi=300)

## Transcription Factors

In [None]:
folder_path_tfs = os.path.join(inpath, results_folder, 'TF_results')  
all_tfs_dfs = []  # To store all your dataframes

for filename in os.listdir(folder_path_tfs):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_tfs, filename)
        df = pd.read_csv(file_path)
        df.columns = ['TF', 'ActivityScore', 'Pvalue']
        df['Condition'] = filename  
        df['Condition'] = df['Condition'].str.replace('.csv', '', regex=False) 
        df.set_index('TF', inplace=True)
        all_tfs_dfs.append(df)

In [None]:
all_tfs_df = pd.concat(all_tfs_dfs, ignore_index=False)

In [None]:
all_tfs_df

In [None]:
all_tfs_df['abs_ActivityScore'] = abs(all_tfs_df['ActivityScore'])

In [None]:
all_tfs_df['TF'] = all_tfs_df.index

In [None]:
top_tfs = all_tfs_df.sort_values(by = 'abs_ActivityScore', ascending=False).drop_duplicates('TF')['TF'].head(35).tolist()

In [None]:
all_tfs_df_filtered = all_tfs_df[all_tfs_df['TF'].isin(top_tfs)]

In [None]:
all_tfs_df_filtered

In [None]:
all_tfs_df_filtered['Sex'] = all_tfs_df_filtered['Condition'].str.split('_', expand=True)[[0]]
all_tfs_df_filtered['Treatment'] = all_tfs_df_filtered['Condition'].str.split('_', expand=True)[1]
all_tfs_df_filtered['Region'] = all_tfs_df_filtered['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = all_tfs_df_filtered.pivot(columns = 'Condition', values="ActivityScore")
pivot_df_pvalues = all_tfs_df_filtered.pivot(columns = 'Condition', values="Pvalue")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
all_tfs_df_filtered_unique = all_tfs_df_filtered[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
all_tfs_df_filtered_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(all_tfs_df_filtered_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(all_tfs_df_filtered_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(all_tfs_df_filtered_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = all_tfs_df_filtered_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = all_tfs_df_filtered_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = all_tfs_df_filtered_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(all_tfs_df_filtered_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('Activity Score', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])


# pivot_df = all_tfs_df_filtered.pivot(columns = 'Condition', values="ActivityScore")

# pivot_df.fillna(0, inplace=True)


## GSEA results

In [None]:
folder_path_gsea = os.path.join(inpath, results_folder, 'GSEA_results')  
all_gsea_dfs = []  # To store all your dataframes

for filename in os.listdir(folder_path_gsea):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_gsea, filename)
        df = pd.read_csv(file_path)
        # df.columns = ['TF', 'ActivityScore', 'Pvalue']
        df['Condition'] = filename  
        df['Condition'] = df['Condition'].str.replace('.csv', '', regex=False) 
        # df.set_index('TF', inplace=True)
        all_gsea_dfs.append(df)

In [None]:
combined_gsea_df = pd.concat(all_gsea_dfs, ignore_index=True)

In [None]:
combined_gsea_df

### Hallmarks

In [None]:
hallmarks_df = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('HALLMARK_')]

In [None]:
hallmarks_df

In [None]:
hallmarks_df['Condition'] = hallmarks_df['Condition'].str.replace('_Results_msigdb_1', '', regex=False) 

In [None]:
hallmarks_df

In [None]:
predefined_order = ['female_AVV2_vs_Control_periportal', 
                    'female_AVV2_vs_Control_other', 
                    'female_AVV2_vs_Control_pericentral', 
                    'female_AVV9_vs_Control_periportal', 
                    'female_AVV9_vs_Control_other', 
                    'female_AVV9_vs_Control_pericentral', 
                    'male_AVV2_vs_Control_periportal', 
                    'male_AVV2_vs_Control_other', 
                    'male_AVV2_vs_Control_pericentral', 
                    'male_AVV9_vs_Control_periportal', 
                    'male_AVV9_vs_Control_other', 
                    'male_AVV9_vs_Control_pericentral'
                    ] 

In [None]:
hallmarks_df.set_index('Term.1', inplace=True)

hallmarks_df['Sex'] = hallmarks_df['Condition'].str.split('_', expand=True)[[0]]
hallmarks_df['Treatment'] = hallmarks_df['Condition'].str.split('_', expand=True)[1]
hallmarks_df['Region'] = hallmarks_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = hallmarks_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = hallmarks_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
hallmarks_df_unique = hallmarks_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
hallmarks_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(hallmarks_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(hallmarks_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(hallmarks_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = hallmarks_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = hallmarks_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = hallmarks_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(hallmarks_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 12), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

# pivot_df = hallmarks_df.pivot(columns = 'Condition', values="NES", index = "Term.1")

# pivot_df.fillna(0, inplace=True)

### Genetic Locus

In [None]:
GeneticLocus_df = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('chr')]

In [None]:
GeneticLocus_df

In [None]:
GeneticLocus_significant_list = GeneticLocus_df[GeneticLocus_df['FDR p-value'] < 0.05]['Term.1'].unique().tolist()
GeneticLocus_significant_list

In [None]:
GeneticLocus_significant_df = GeneticLocus_df[GeneticLocus_df['Term.1'].isin(GeneticLocus_significant_list)]

In [None]:
GeneticLocus_significant_df['Condition'] =  GeneticLocus_significant_df['Condition'].str.replace('_Results_msigdb_1', '', regex=False) 

In [None]:
GeneticLocus_significant_df

In [None]:
GeneticLocus_significant_df.set_index('Term.1', inplace=True)

GeneticLocus_significant_df['Sex'] = GeneticLocus_significant_df['Condition'].str.split('_', expand=True)[[0]]
GeneticLocus_significant_df['Treatment'] = GeneticLocus_significant_df['Condition'].str.split('_', expand=True)[1]
GeneticLocus_significant_df['Region'] = GeneticLocus_significant_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GeneticLocus_significant_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GeneticLocus_significant_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GeneticLocus_significant_df_unique = GeneticLocus_significant_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GeneticLocus_significant_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GeneticLocus_significant_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GeneticLocus_significant_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GeneticLocus_significant_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GeneticLocus_significant_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GeneticLocus_significant_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GeneticLocus_significant_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GeneticLocus_significant_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

# pivot_df = hallmarks_df.pivot(columns = 'Condition', values="NES", index = "Term.1")

# pivot_df.fillna(0, inplace=True)

### GO:BP TERMS

In [None]:
GOBP_terms = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('GOBP_')]

In [None]:
GOBP_terms

In [None]:
GOBP_terms['abs_NES'] = abs(GOBP_terms['NES'])

In [None]:
top_GOBPs = GOBP_terms.sort_values(by = 'abs_NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(35).tolist()

In [None]:
top_GOBPs

In [None]:
GOBP_terms_df = GOBP_terms[GOBP_terms['Term.1'].isin(top_GOBPs)]

In [None]:
GOBP_terms_df['Condition'] =  GOBP_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOBP_terms_df['Term.1'] =  GOBP_terms_df['Term.1'].str.replace('GOBP_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOBP_terms_df['Term.1_truncated'] = GOBP_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOBP_terms_df.set_index('Term.1_truncated', inplace=True)

GOBP_terms_df['Sex'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOBP_terms_df['Treatment'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[1]
GOBP_terms_df['Region'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOBP_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOBP_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOBP_terms_df_unique = GOBP_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOBP_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOBP_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOBP_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOBP_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOBP_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOBP_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOBP_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOBP_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

Can we also check that they are responding differently between pericentral and periportal in the different conditions? 

In [None]:
GOBP_terms

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results_msigdb_3','male_AVV9_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOBP_terms[GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOBP_terms_df = GOBP_terms[GOBP_terms['Term.1'].isin(different_terms) & GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOBP_terms_df['Condition'] =  GOBP_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOBP_terms_df['Term.1'] =  GOBP_terms_df['Term.1'].str.replace('GOBP_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOBP_terms_df['Term.1_truncated'] = GOBP_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOBP_terms_df.set_index('Term.1_truncated', inplace=True)

GOBP_terms_df['Sex'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOBP_terms_df['Treatment'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[1]
GOBP_terms_df['Region'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOBP_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOBP_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOBP_terms_df_unique = GOBP_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOBP_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOBP_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOBP_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOBP_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOBP_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOBP_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOBP_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOBP_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results_msigdb_3','male_AVV2_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOBP_terms[GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOBP_terms_df = GOBP_terms[GOBP_terms['Term.1'].isin(different_terms) & GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOBP_terms_df['Condition'] =  GOBP_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOBP_terms_df['Term.1'] =  GOBP_terms_df['Term.1'].str.replace('GOBP_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOBP_terms_df['Term.1_truncated'] = GOBP_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOBP_terms_df.set_index('Term.1_truncated', inplace=True)

GOBP_terms_df['Sex'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOBP_terms_df['Treatment'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[1]
GOBP_terms_df['Region'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOBP_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOBP_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOBP_terms_df_unique = GOBP_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOBP_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOBP_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOBP_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOBP_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOBP_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOBP_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOBP_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOBP_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 14), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results_msigdb_3','female_AVV9_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOBP_terms[GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOBP_terms_df = GOBP_terms[GOBP_terms['Term.1'].isin(different_terms) & GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOBP_terms_df['Condition'] =  GOBP_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOBP_terms_df['Term.1'] =  GOBP_terms_df['Term.1'].str.replace('GOBP_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOBP_terms_df['Term.1_truncated'] = GOBP_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOBP_terms_df.set_index('Term.1_truncated', inplace=True)

GOBP_terms_df['Sex'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOBP_terms_df['Treatment'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[1]
GOBP_terms_df['Region'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOBP_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOBP_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOBP_terms_df_unique = GOBP_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOBP_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOBP_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOBP_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOBP_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOBP_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOBP_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOBP_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOBP_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results_msigdb_3','female_AVV2_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOBP_terms[GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOBP_terms_df = GOBP_terms[GOBP_terms['Term.1'].isin(different_terms) & GOBP_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOBP_terms_df['Condition'] =  GOBP_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOBP_terms_df['Term.1'] =  GOBP_terms_df['Term.1'].str.replace('GOBP_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOBP_terms_df['Term.1_truncated'] = GOBP_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOBP_terms_df.set_index('Term.1_truncated', inplace=True)

GOBP_terms_df['Sex'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOBP_terms_df['Treatment'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[1]
GOBP_terms_df['Region'] = GOBP_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOBP_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOBP_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOBP_terms_df_unique = GOBP_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOBP_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOBP_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOBP_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOBP_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOBP_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOBP_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOBP_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOBP_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 16), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### GO:CC TERMS

In [None]:
GOCC_terms = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('GOCC_')]

In [None]:
GOCC_terms

In [None]:
GOCC_terms['abs_NES'] = abs(GOCC_terms['NES'])

In [None]:
top_GOCCs = GOCC_terms.sort_values(by = 'abs_NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(35).tolist()

In [None]:
top_GOCCs

In [None]:
GOCC_terms_df = GOCC_terms[GOCC_terms['Term.1'].isin(top_GOCCs)]

In [None]:
GOCC_terms_df['Condition'] =  GOCC_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOCC_terms_df['Term.1'] =  GOCC_terms_df['Term.1'].str.replace('GOCC_', '', regex=False) 

In [None]:
GOCC_terms_df['Term.1_truncated'] = GOCC_terms_df['Term.1'].apply(lambda x: truncate_string(x, 55))

In [None]:
GOCC_terms_df.set_index('Term.1_truncated', inplace=True)

GOCC_terms_df['Sex'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOCC_terms_df['Treatment'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[1]
GOCC_terms_df['Region'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOCC_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOCC_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOCC_terms_df_unique = GOCC_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOCC_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOCC_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOCC_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOCC_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOCC_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOCC_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOCC_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOCC_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

Can we also check that they are responding differently between pericentral and periportal in the different conditions? 

In [None]:
GOCC_terms

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results_msigdb_3','male_AVV9_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOCC_terms[GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOCC_terms_df = GOCC_terms[GOCC_terms['Term.1'].isin(different_terms) & GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOCC_terms_df['Condition'] =  GOCC_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOCC_terms_df['Term.1'] =  GOCC_terms_df['Term.1'].str.replace('GOCC_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOCC_terms_df['Term.1_truncated'] = GOCC_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOCC_terms_df

In [None]:
GOCC_terms_df.set_index('Term.1_truncated', inplace=True)

GOCC_terms_df['Sex'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOCC_terms_df['Treatment'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[1]
GOCC_terms_df['Region'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOCC_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOCC_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOCC_terms_df_unique = GOCC_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOCC_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOCC_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOCC_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOCC_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOCC_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOCC_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOCC_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOCC_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results_msigdb_3','male_AVV2_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOCC_terms[GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOCC_terms_df = GOCC_terms[GOCC_terms['Term.1'].isin(different_terms) & GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOCC_terms_df['Condition'] =  GOCC_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOCC_terms_df['Term.1'] =  GOCC_terms_df['Term.1'].str.replace('GOCC_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOCC_terms_df['Term.1_truncated'] = GOCC_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOCC_terms_df.set_index('Term.1_truncated', inplace=True)

GOCC_terms_df['Sex'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOCC_terms_df['Treatment'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[1]
GOCC_terms_df['Region'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOCC_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOCC_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOCC_terms_df_unique = GOCC_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOCC_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOCC_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOCC_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOCC_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOCC_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOCC_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOCC_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOCC_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results_msigdb_3','female_AVV9_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOCC_terms[GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOCC_terms_df = GOCC_terms[GOCC_terms['Term.1'].isin(different_terms) & GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOCC_terms_df['Condition'] =  GOCC_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOCC_terms_df['Term.1'] =  GOCC_terms_df['Term.1'].str.replace('GOCC_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOCC_terms_df['Term.1_truncated'] = GOCC_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOCC_terms_df.set_index('Term.1_truncated', inplace=True)

GOCC_terms_df['Sex'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOCC_terms_df['Treatment'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[1]
GOCC_terms_df['Region'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOCC_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOCC_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOCC_terms_df_unique = GOCC_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOCC_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOCC_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOCC_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOCC_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOCC_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOCC_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOCC_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOCC_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results_msigdb_3','female_AVV2_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOCC_terms[GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOCC_terms_df = GOCC_terms[GOCC_terms['Term.1'].isin(different_terms) & GOCC_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOCC_terms_df['Condition'] =  GOCC_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOCC_terms_df['Term.1'] =  GOCC_terms_df['Term.1'].str.replace('GOCC_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOCC_terms_df['Term.1_truncated'] = GOCC_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOCC_terms_df.set_index('Term.1_truncated', inplace=True)

GOCC_terms_df['Sex'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOCC_terms_df['Treatment'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[1]
GOCC_terms_df['Region'] = GOCC_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOCC_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOCC_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOCC_terms_df_unique = GOCC_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOCC_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOCC_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOCC_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOCC_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOCC_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOCC_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOCC_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOCC_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### GO:MF TERMS

In [None]:
GOMF_terms = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('GOMF_')]

In [None]:
GOMF_terms

In [None]:
GOMF_terms['abs_NES'] = abs(GOMF_terms['NES'])

In [None]:
top_GOMFs = GOMF_terms.sort_values(by = 'abs_NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(35).tolist()

In [None]:
top_GOMFs

In [None]:
GOMF_terms_df = GOMF_terms[GOMF_terms['Term.1'].isin(top_GOMFs)]

In [None]:
GOMF_terms_df['Condition'] =  GOMF_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOMF_terms_df['Term.1'] =  GOMF_terms_df['Term.1'].str.replace('GOMF_', '', regex=False) 

In [None]:
GOMF_terms_df['Term.1_truncated'] = GOMF_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOMF_terms_df.set_index('Term.1_truncated', inplace=True)

GOMF_terms_df['Sex'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOMF_terms_df['Treatment'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[1]
GOMF_terms_df['Region'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOMF_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOMF_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOMF_terms_df_unique = GOMF_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOMF_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOMF_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOMF_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOMF_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOMF_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOMF_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOMF_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOMF_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

Can we also check that they are responding differently between pericentral and periportal in the different conditions? 

In [None]:
GOMF_terms

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results_msigdb_3','male_AVV9_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOMF_terms[GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOMF_terms_df = GOMF_terms[GOMF_terms['Term.1'].isin(different_terms) & GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOMF_terms_df['Condition'] =  GOMF_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOMF_terms_df['Term.1'] =  GOMF_terms_df['Term.1'].str.replace('GOMF_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOMF_terms_df['Term.1_truncated'] = GOMF_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOMF_terms_df

In [None]:
GOMF_terms_df.set_index('Term.1_truncated', inplace=True)

GOMF_terms_df['Sex'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOMF_terms_df['Treatment'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[1]
GOMF_terms_df['Region'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOMF_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOMF_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOMF_terms_df_unique = GOMF_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOMF_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOMF_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOMF_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOMF_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOMF_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOMF_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOMF_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOMF_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results_msigdb_3','male_AVV2_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOMF_terms[GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOMF_terms_df = GOMF_terms[GOMF_terms['Term.1'].isin(different_terms) & GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOMF_terms_df['Condition'] =  GOMF_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOMF_terms_df['Term.1'] =  GOMF_terms_df['Term.1'].str.replace('GOMF_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOMF_terms_df['Term.1_truncated'] = GOMF_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOMF_terms_df.set_index('Term.1_truncated', inplace=True)

GOMF_terms_df['Sex'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOMF_terms_df['Treatment'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[1]
GOMF_terms_df['Region'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOMF_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOMF_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOMF_terms_df_unique = GOMF_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOMF_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOMF_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOMF_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOMF_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOMF_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOMF_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOMF_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOMF_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results_msigdb_3','female_AVV9_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOMF_terms[GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOMF_terms_df = GOMF_terms[GOMF_terms['Term.1'].isin(different_terms) & GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOMF_terms_df['Condition'] =  GOMF_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOMF_terms_df['Term.1'] =  GOMF_terms_df['Term.1'].str.replace('GOMF_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOMF_terms_df['Term.1_truncated'] = GOMF_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOMF_terms_df.set_index('Term.1_truncated', inplace=True)

GOMF_terms_df['Sex'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOMF_terms_df['Treatment'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[1]
GOMF_terms_df['Region'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOMF_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOMF_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOMF_terms_df_unique = GOMF_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOMF_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOMF_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOMF_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOMF_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOMF_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOMF_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOMF_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOMF_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results_msigdb_3','female_AVV2_vs_Control_periportal_Results_msigdb_3']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = GOMF_terms[GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
GOMF_terms_df = GOMF_terms[GOMF_terms['Term.1'].isin(different_terms) & GOMF_terms['Condition'].isin(conditions_to_check)]

In [None]:
GOMF_terms_df['Condition'] =  GOMF_terms_df['Condition'].str.replace('_Results_msigdb_3', '', regex=False) 

In [None]:
GOMF_terms_df['Term.1'] =  GOMF_terms_df['Term.1'].str.replace('GOMF_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
GOMF_terms_df['Term.1_truncated'] = GOMF_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
GOMF_terms_df.set_index('Term.1_truncated', inplace=True)

GOMF_terms_df['Sex'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[[0]]
GOMF_terms_df['Treatment'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[1]
GOMF_terms_df['Region'] = GOMF_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = GOMF_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = GOMF_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
GOMF_terms_df_unique = GOMF_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
GOMF_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(GOMF_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(GOMF_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(GOMF_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = GOMF_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = GOMF_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = GOMF_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(GOMF_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Pathways: Reactome

In [None]:
reactome_terms = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('REACTOME_')]

In [None]:
reactome_terms

In [None]:
reactome_terms['abs_NES'] = abs(reactome_terms['NES'])

In [None]:
top_reactome_terms = reactome_terms.sort_values(by = 'abs_NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(35).tolist()

In [None]:
top_reactome_terms

In [None]:
reactome_terms_df = reactome_terms[reactome_terms['Term.1'].isin(top_reactome_terms)]

In [None]:
reactome_terms_df['Condition'] =  reactome_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
reactome_terms_df['Term.1'] =  reactome_terms_df['Term.1'].str.replace('REACTOME_', '', regex=False) 

In [None]:
reactome_terms_df['Term.1_truncated'] = reactome_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
reactome_terms_df.set_index('Term.1_truncated', inplace=True)

reactome_terms_df['Sex'] = reactome_terms_df['Condition'].str.split('_', expand=True)[[0]]
reactome_terms_df['Treatment'] = reactome_terms_df['Condition'].str.split('_', expand=True)[1]
reactome_terms_df['Region'] = reactome_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = reactome_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = reactome_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
reactome_terms_df_unique = reactome_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
reactome_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(reactome_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(reactome_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(reactome_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = reactome_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = reactome_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = reactome_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(reactome_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

As many of the top results are coming from negative enrichment results (reduced activity in treated samples as compared to controls), we focus now on the top most active pathways after treatment. 

In [None]:
top_reactome_terms_up = reactome_terms.sort_values(by = 'NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(75).tolist()

In [None]:
top_reactome_terms_up

In [None]:
reactome_terms_df = reactome_terms[reactome_terms['Term.1'].isin(top_reactome_terms_up)]

In [None]:
reactome_terms_df['Condition'] =  reactome_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
reactome_terms_df['Term.1'] =  reactome_terms_df['Term.1'].str.replace('REACTOME_', '', regex=False) 

In [None]:
reactome_terms_df['Term.1_truncated'] = reactome_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
reactome_terms_df.set_index('Term.1_truncated', inplace=True)

reactome_terms_df['Sex'] = reactome_terms_df['Condition'].str.split('_', expand=True)[[0]]
reactome_terms_df['Treatment'] = reactome_terms_df['Condition'].str.split('_', expand=True)[1]
reactome_terms_df['Region'] = reactome_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = reactome_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = reactome_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
reactome_terms_df_unique = reactome_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
reactome_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(reactome_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(reactome_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(reactome_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = reactome_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = reactome_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = reactome_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(reactome_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 24), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

In [None]:
search_value_1 = 'TCR'
filtered_df_1 = reactome_terms[reactome_terms['Term.1'].str.contains(search_value_1, case=False, na=False)]
search_value_2 = 'PKA'
filtered_df_2 = reactome_terms[reactome_terms['Term.1'].str.contains(search_value_2, case=False, na=False)]

In [None]:
result = pd.concat([filtered_df_1, filtered_df_2], ignore_index=True).reset_index(drop=True)

In [None]:
result

In [None]:
result.set_index('Term.1', inplace=True)

In [None]:
predefined_order = ['female_AVV2_vs_Control_periportal_Results_msigdb_4', 
                    'female_AVV2_vs_Control_other_Results_msigdb_4', 
                    'female_AVV2_vs_Control_pericentral_Results_msigdb_4', 
                    'female_AVV9_vs_Control_periportal_Results_msigdb_4', 
                    'female_AVV9_vs_Control_other_Results_msigdb_4', 
                    'female_AVV9_vs_Control_pericentral_Results_msigdb_4', 
                    'male_AVV2_vs_Control_periportal_Results_msigdb_4', 
                    'male_AVV2_vs_Control_other_Results_msigdb_4', 
                    'male_AVV2_vs_Control_pericentral_Results_msigdb_4', 
                    'male_AVV9_vs_Control_periportal_Results_msigdb_4', 
                    'male_AVV9_vs_Control_other_Results_msigdb_4', 
                    'male_AVV9_vs_Control_pericentral_Results_msigdb_4' 
                    ] 

In [None]:
result['Sex'] = result['Condition'].str.split('_', expand=True)[[0]]
result['Treatment'] = result['Condition'].str.split('_', expand=True)[1]
result['Region'] = result['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = result.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = result.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
result_df_unique = result[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
result_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(result_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(result_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(result_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = result_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = result_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = result_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(result_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 6), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.15, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

Can we also check that they are responding differently between pericentral and periportal in the different conditions? 

In [None]:
reactome_terms

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results_msigdb_4','male_AVV9_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = reactome_terms[reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
reactome_terms_df = reactome_terms[reactome_terms['Term.1'].isin(different_terms) & reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
reactome_terms_df['Condition'] =  reactome_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
reactome_terms_df['Term.1'] =  reactome_terms_df['Term.1'].str.replace('REACTOME_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
reactome_terms_df['Term.1_truncated'] = reactome_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
reactome_terms_df

In [None]:
reactome_terms_df.set_index('Term.1_truncated', inplace=True)

reactome_terms_df['Sex'] = reactome_terms_df['Condition'].str.split('_', expand=True)[[0]]
reactome_terms_df['Treatment'] = reactome_terms_df['Condition'].str.split('_', expand=True)[1]
reactome_terms_df['Region'] = reactome_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = reactome_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = reactome_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
reactome_terms_df_unique = reactome_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
reactome_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(reactome_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(reactome_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(reactome_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = reactome_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = reactome_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = reactome_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(reactome_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results_msigdb_4','male_AVV2_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = reactome_terms[reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
reactome_terms_df = reactome_terms[reactome_terms['Term.1'].isin(different_terms) & reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
reactome_terms_df['Condition'] =  reactome_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
reactome_terms_df['Term.1'] =  reactome_terms_df['Term.1'].str.replace('REACTOME_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
reactome_terms_df['Term.1_truncated'] = reactome_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
reactome_terms_df.set_index('Term.1_truncated', inplace=True)

reactome_terms_df['Sex'] = reactome_terms_df['Condition'].str.split('_', expand=True)[[0]]
reactome_terms_df['Treatment'] = reactome_terms_df['Condition'].str.split('_', expand=True)[1]
reactome_terms_df['Region'] = reactome_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = reactome_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = reactome_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
reactome_terms_df_unique = reactome_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
reactome_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(reactome_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(reactome_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(reactome_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = reactome_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = reactome_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = reactome_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(reactome_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results_msigdb_4','female_AVV9_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = reactome_terms[reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
reactome_terms_df = reactome_terms[reactome_terms['Term.1'].isin(different_terms) & reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
reactome_terms_df['Condition'] =  reactome_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
reactome_terms_df['Term.1'] =  reactome_terms_df['Term.1'].str.replace('REACTOME_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
reactome_terms_df['Term.1_truncated'] = reactome_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
reactome_terms_df.set_index('Term.1_truncated', inplace=True)

reactome_terms_df['Sex'] = reactome_terms_df['Condition'].str.split('_', expand=True)[[0]]
reactome_terms_df['Treatment'] = reactome_terms_df['Condition'].str.split('_', expand=True)[1]
reactome_terms_df['Region'] = reactome_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = reactome_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = reactome_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
reactome_terms_df_unique = reactome_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
reactome_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(reactome_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(reactome_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(reactome_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = reactome_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = reactome_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = reactome_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(reactome_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results_msigdb_4','female_AVV2_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = reactome_terms[reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.05) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
reactome_terms_df = reactome_terms[reactome_terms['Term.1'].isin(different_terms) & reactome_terms['Condition'].isin(conditions_to_check)]

In [None]:
reactome_terms_df['Condition'] =  reactome_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
reactome_terms_df['Term.1'] =  reactome_terms_df['Term.1'].str.replace('REACTOME_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
reactome_terms_df['Term.1_truncated'] = reactome_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
reactome_terms_df.set_index('Term.1_truncated', inplace=True)

reactome_terms_df['Sex'] = reactome_terms_df['Condition'].str.split('_', expand=True)[[0]]
reactome_terms_df['Treatment'] = reactome_terms_df['Condition'].str.split('_', expand=True)[1]
reactome_terms_df['Region'] = reactome_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = reactome_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = reactome_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
reactome_terms_df_unique = reactome_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
reactome_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(reactome_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(reactome_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(reactome_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = reactome_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = reactome_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = reactome_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(reactome_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

## Pathways: KEGG

In [None]:
KEGG_terms = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('KEGG_')]

In [None]:
KEGG_terms

In [None]:
KEGG_terms['abs_NES'] = abs(KEGG_terms['NES'])

In [None]:
top_KEGG_terms = KEGG_terms.sort_values(by = 'abs_NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(35).tolist()

In [None]:
top_KEGG_terms

In [None]:
KEGG_terms_df = KEGG_terms[KEGG_terms['Term.1'].isin(top_KEGG_terms)]

In [None]:
KEGG_terms_df['Condition'] =  KEGG_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
KEGG_terms_df['Term.1'] =  KEGG_terms_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
KEGG_terms_df['Term.1_truncated'] = KEGG_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
predefined_order = ['female_AVV2_vs_Control_periportal', 
                    'female_AVV2_vs_Control_other', 
                    'female_AVV2_vs_Control_pericentral', 
                    'female_AVV9_vs_Control_periportal', 
                    'female_AVV9_vs_Control_other', 
                    'female_AVV9_vs_Control_pericentral',
                    'male_AVV2_vs_Control_periportal', 
                    'male_AVV2_vs_Control_other', 
                    'male_AVV2_vs_Control_pericentral', 
                    'male_AVV9_vs_Control_periportal', 
                    'male_AVV9_vs_Control_other', 
                    'male_AVV9_vs_Control_pericentral' 
                    ] 

In [None]:
KEGG_terms_df.set_index('Term.1_truncated', inplace=True)

KEGG_terms_df['Sex'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[[0]]
KEGG_terms_df['Treatment'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[1]
KEGG_terms_df['Region'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = KEGG_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = KEGG_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
KEGG_terms_df_unique = KEGG_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
KEGG_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(KEGG_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(KEGG_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(KEGG_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = KEGG_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = KEGG_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = KEGG_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(KEGG_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

Can we also check that they are responding differently between pericentral and periportal in the different conditions? 

In [None]:
KEGG_terms

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results_msigdb_4','male_AVV9_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = KEGG_terms[KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.1) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
KEGG_terms_df = KEGG_terms[KEGG_terms['Term.1'].isin(different_terms) & KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
KEGG_terms_df['Condition'] =  KEGG_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
KEGG_terms_df['Term.1'] =  KEGG_terms_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
KEGG_terms_df['Term.1_truncated'] = KEGG_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
KEGG_terms_df

In [None]:
KEGG_terms_df.set_index('Term.1_truncated', inplace=True)

KEGG_terms_df['Sex'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[[0]]
KEGG_terms_df['Treatment'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[1]
KEGG_terms_df['Region'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = KEGG_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = KEGG_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
KEGG_terms_df_unique = KEGG_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
KEGG_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(KEGG_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(KEGG_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(KEGG_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = KEGG_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = KEGG_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = KEGG_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(KEGG_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results_msigdb_4','male_AVV2_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = KEGG_terms[KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.1) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
KEGG_terms_df = KEGG_terms[KEGG_terms['Term.1'].isin(different_terms) & KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
KEGG_terms_df['Condition'] =  KEGG_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
KEGG_terms_df['Term.1'] =  KEGG_terms_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
KEGG_terms_df['Term.1_truncated'] = KEGG_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
KEGG_terms_df.set_index('Term.1_truncated', inplace=True)

KEGG_terms_df['Sex'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[[0]]
KEGG_terms_df['Treatment'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[1]
KEGG_terms_df['Region'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = KEGG_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = KEGG_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
KEGG_terms_df_unique = KEGG_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
KEGG_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(KEGG_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(KEGG_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(KEGG_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = KEGG_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = KEGG_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = KEGG_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(KEGG_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results_msigdb_4','female_AVV9_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = KEGG_terms[KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.2) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
KEGG_terms_df = KEGG_terms[KEGG_terms['Term.1'].isin(different_terms) & KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
KEGG_terms_df['Condition'] =  KEGG_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
KEGG_terms_df['Term.1'] =  KEGG_terms_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
KEGG_terms_df['Term.1_truncated'] = KEGG_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
KEGG_terms_df.set_index('Term.1_truncated', inplace=True)

KEGG_terms_df['Sex'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[[0]]
KEGG_terms_df['Treatment'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[1]
KEGG_terms_df['Region'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = KEGG_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = KEGG_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
KEGG_terms_df_unique = KEGG_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
KEGG_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(KEGG_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(KEGG_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(KEGG_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = KEGG_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = KEGG_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = KEGG_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(KEGG_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    # dendrogram_ratio=(.175, .025), 
                    center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results_msigdb_4','female_AVV2_vs_Control_periportal_Results_msigdb_4']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = KEGG_terms[KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.2) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
KEGG_terms_df = KEGG_terms[KEGG_terms['Term.1'].isin(different_terms) & KEGG_terms['Condition'].isin(conditions_to_check)]

In [None]:
KEGG_terms_df['Condition'] =  KEGG_terms_df['Condition'].str.replace('_Results_msigdb_4', '', regex=False) 

In [None]:
KEGG_terms_df['Term.1'] =  KEGG_terms_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
KEGG_terms_df['Term.1_truncated'] = KEGG_terms_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
KEGG_terms_df.set_index('Term.1_truncated', inplace=True)

KEGG_terms_df['Sex'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[[0]]
KEGG_terms_df['Treatment'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[1]
KEGG_terms_df['Region'] = KEGG_terms_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = KEGG_terms_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = KEGG_terms_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
KEGG_terms_df_unique = KEGG_terms_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
KEGG_terms_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(KEGG_terms_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(KEGG_terms_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(KEGG_terms_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = KEGG_terms_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = KEGG_terms_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = KEGG_terms_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(KEGG_terms_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(0.02, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=10)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

## msigdb_2

In [None]:
folder_path_gsea = os.path.join(inpath, results_folder, 'GSEA_results')  
msigdb_2_dfs = []  # To store all your dataframes

for filename in os.listdir(folder_path_gsea):
    if filename.endswith('_msigdb_2.csv'):
        file_path = os.path.join(folder_path_gsea, filename)
        df = pd.read_csv(file_path)
        # df.columns = ['TF', 'ActivityScore', 'Pvalue']
        df['Condition'] = filename  
        df['Condition'] = df['Condition'].str.replace('.csv', '', regex=False) 
        # df.set_index('TF', inplace=True)
        msigdb_2_dfs.append(df)

In [None]:
combined_gsea_2_df = pd.concat(msigdb_2_dfs, ignore_index=True)

In [None]:
combined_gsea_2_df

In [None]:
combined_gsea_2 = combined_gsea_2_df.copy()

In [None]:
combined_gsea_2_df['Term.1']

In [None]:
combined_gsea_2_df['abs_NES'] = abs(combined_gsea_2_df['NES'])

In [None]:
top_gsea_2_terms = combined_gsea_2_df.sort_values(by = 'abs_NES', ascending=False).drop_duplicates('Term.1')['Term.1'].head(35).tolist()

In [None]:
top_gsea_2_terms

In [None]:
combined_gsea_2_df = combined_gsea_2_df[combined_gsea_2_df['Term.1'].isin(top_gsea_2_terms)]

In [None]:
combined_gsea_2_df

In [None]:
combined_gsea_2_df.set_index('Term.1', inplace=True)

In [None]:
predefined_order = ['female_AVV2_vs_Control_periportal_Results_msigdb_2', 
                    'female_AVV2_vs_Control_other_Results_msigdb_2', 
                    'female_AVV2_vs_Control_pericentral_Results_msigdb_2', 
                    'female_AVV9_vs_Control_periportal_Results_msigdb_2', 
                    'female_AVV9_vs_Control_other_Results_msigdb_2', 
                    'female_AVV9_vs_Control_pericentral_Results_msigdb_2',
                    'male_AVV2_vs_Control_periportal_Results_msigdb_2', 
                    'male_AVV2_vs_Control_other_Results_msigdb_2', 
                    'male_AVV2_vs_Control_pericentral_Results_msigdb_2', 
                    'male_AVV9_vs_Control_periportal_Results_msigdb_2', 
                    'male_AVV9_vs_Control_other_Results_msigdb_2', 
                    'male_AVV9_vs_Control_pericentral_Results_msigdb_2' 
                    ] 

In [None]:
combined_gsea_2_df['Sex'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[[0]]
combined_gsea_2_df['Treatment'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[1]
combined_gsea_2_df['Region'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = combined_gsea_2_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = combined_gsea_2_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
combined_gsea_2_df_unique = combined_gsea_2_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
combined_gsea_2_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(combined_gsea_2_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(combined_gsea_2_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(combined_gsea_2_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = combined_gsea_2_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = combined_gsea_2_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = combined_gsea_2_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(combined_gsea_2_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC[predefined_order], cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues[predefined_order], fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(-0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=5)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

Can we also check that they are responding differently between pericentral and periportal in the different conditions? 

In [None]:
combined_gsea_2

### Male AAV9 

In [None]:
conditions_to_check=['male_AVV9_vs_Control_pericentral_Results_msigdb_2','male_AVV9_vs_Control_periportal_Results_msigdb_2']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_gsea_2[combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.01) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
combined_gsea_2_df = combined_gsea_2[combined_gsea_2['Term.1'].isin(different_terms) & combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
combined_gsea_2_df['Condition'] =  combined_gsea_2_df['Condition'].str.replace('_Results_msigdb_2', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
combined_gsea_2_df['Term.1_truncated'] = combined_gsea_2_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
combined_gsea_2_df

In [None]:
combined_gsea_2_df.set_index('Term.1_truncated', inplace=True)

combined_gsea_2_df['Sex'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[[0]]
combined_gsea_2_df['Treatment'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[1]
combined_gsea_2_df['Region'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = combined_gsea_2_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = combined_gsea_2_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
combined_gsea_2_df_unique = combined_gsea_2_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
combined_gsea_2_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(combined_gsea_2_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(combined_gsea_2_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(combined_gsea_2_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = combined_gsea_2_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = combined_gsea_2_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = combined_gsea_2_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(combined_gsea_2_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(-0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=5)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Male AAV2 

In [None]:
conditions_to_check=['male_AVV2_vs_Control_pericentral_Results_msigdb_2','male_AVV2_vs_Control_periportal_Results_msigdb_2']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_gsea_2[combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.01) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
combined_gsea_2_df = combined_gsea_2[combined_gsea_2['Term.1'].isin(different_terms) & combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
combined_gsea_2_df['Condition'] =  combined_gsea_2_df['Condition'].str.replace('_Results_msigdb_2', '', regex=False) 

In [None]:
combined_gsea_2_df['Term.1'] =  combined_gsea_2_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
combined_gsea_2_df['Term.1_truncated'] = combined_gsea_2_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
combined_gsea_2_df.set_index('Term.1_truncated', inplace=True)

combined_gsea_2_df['Sex'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[[0]]
combined_gsea_2_df['Treatment'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[1]
combined_gsea_2_df['Region'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = combined_gsea_2_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = combined_gsea_2_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
combined_gsea_2_df_unique = combined_gsea_2_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
combined_gsea_2_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(combined_gsea_2_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(combined_gsea_2_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(combined_gsea_2_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = combined_gsea_2_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = combined_gsea_2_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = combined_gsea_2_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(combined_gsea_2_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(-0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=5)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV9 

In [None]:
conditions_to_check=['female_AVV9_vs_Control_pericentral_Results_msigdb_2','female_AVV9_vs_Control_periportal_Results_msigdb_2']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_gsea_2[combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.01) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
combined_gsea_2_df = combined_gsea_2[combined_gsea_2['Term.1'].isin(different_terms) & combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
combined_gsea_2_df['Condition'] =  combined_gsea_2_df['Condition'].str.replace('_Results_msigdb_2', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
combined_gsea_2_df['Term.1_truncated'] = combined_gsea_2_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
combined_gsea_2_df

In [None]:
combined_gsea_2_df.set_index('Term.1_truncated', inplace=True)

combined_gsea_2_df['Sex'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[[0]]
combined_gsea_2_df['Treatment'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[1]
combined_gsea_2_df['Region'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = combined_gsea_2_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = combined_gsea_2_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
combined_gsea_2_df_unique = combined_gsea_2_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
combined_gsea_2_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(combined_gsea_2_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(combined_gsea_2_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(combined_gsea_2_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = combined_gsea_2_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = combined_gsea_2_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = combined_gsea_2_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(combined_gsea_2_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(-0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=5)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

### Female AAV2 

In [None]:
conditions_to_check=['female_AVV2_vs_Control_pericentral_Results_msigdb_2','female_AVV2_vs_Control_periportal_Results_msigdb_2']
combined_df.fillna(1, inplace=True)

In [None]:
filtered_df = combined_gsea_2[combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
filtered_df

In [None]:
all_terms = filtered_df['Term.1'].unique().tolist()

In [None]:
different_terms = []

for current_term in all_terms:
    
    filtered_df_term = filtered_df[filtered_df['Term.1'] == current_term]
    
    conditions = filtered_df_term['Condition'].unique()

    if len(conditions) < 2: 
        continue
        # if (filtered_df_gene['padj'] < 0.05).tolist()[0]:
        #    different_genes.append(current_gene)
    else:
        # Define your specific criteria here (for demonstration, using arbitrary conditions)
        
        criteria_met = (any(filtered_df_term['NOM p-value'] < 0.01) ) and \
                    (filtered_df_term['NES'].tolist()[0] * filtered_df_term['NES'].tolist()[1] < 0)
        
        if criteria_met:
            different_terms.append(current_term)

In [None]:
different_terms

In [None]:
combined_gsea_2_df = combined_gsea_2[combined_gsea_2['Term.1'].isin(different_terms) & combined_gsea_2['Condition'].isin(conditions_to_check)]

In [None]:
combined_gsea_2_df['Condition'] =  combined_gsea_2_df['Condition'].str.replace('_Results_msigdb_2', '', regex=False) 

In [None]:
combined_gsea_2_df['Term.1'] =  combined_gsea_2_df['Term.1'].str.replace('KEGG_', '', regex=False) 

In [None]:
def truncate_string(input_string, max_length):
    if len(input_string) > max_length:
        return input_string[:max_length] + "..."
    else:
        return input_string

In [None]:
combined_gsea_2_df['Term.1_truncated'] = combined_gsea_2_df['Term.1'].apply(lambda x: truncate_string(x, 45))

In [None]:
combined_gsea_2_df.set_index('Term.1_truncated', inplace=True)

combined_gsea_2_df['Sex'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[[0]]
combined_gsea_2_df['Treatment'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[1]
combined_gsea_2_df['Region'] = combined_gsea_2_df['Condition'].str.split('_', expand=True)[4]

pivot_df_logFC = combined_gsea_2_df.pivot(columns = 'Condition', values="NES")
pivot_df_pvalues = combined_gsea_2_df.pivot(columns = 'Condition', values="FDR p-value")
pivot_df_pvalues = pivot_df_pvalues.applymap(lambda x: '*' if x < 0.05 else ' ')

pivot_df_logFC.fillna(0, inplace=True)


# Map the conditions to colors
combined_gsea_2_df_unique = combined_gsea_2_df[['Condition', 'Sex', 'Treatment','Region']].drop_duplicates()
combined_gsea_2_df_unique.set_index("Condition", inplace=True)

sex_colors = ['#c66874','#67c1ca']
treatment_colors = ['#687ac8','#c8b866']
region_colors = ['#c98367',  '#6ac989','#9c69c8']

sex_lut = dict(zip(combined_gsea_2_df_unique['Sex'].unique(), sex_colors))
treatment_lut = dict(zip(combined_gsea_2_df_unique['Treatment'].unique(), treatment_colors))
region_lut = dict(zip(combined_gsea_2_df_unique['Region'].unique(), region_colors))

# Convert the conditions to a DataFrame of colors
col_colors  = pd.DataFrame(index=pivot_df_logFC.columns)
col_colors['Sex'] = combined_gsea_2_df_unique['Sex'].map(sex_lut)
col_colors['Treatment'] = combined_gsea_2_df_unique['Treatment'].map(treatment_lut)
col_colors['Region'] = combined_gsea_2_df_unique['Region'].map(region_lut)

legend_elements = []
for cond, cmap in zip(['Sex', 'Treatment', 'Region'], [sex_colors, treatment_colors, region_colors]):
    legend_elements.append(Patch(facecolor='none', edgecolor='none', label=cond + ':'))
    for label, color in zip(combined_gsea_2_df_unique[cond].unique(), cmap):
        legend_elements.append(Patch(facecolor=color, label=label))


# Create the clustermap
cg = sns.clustermap(pivot_df_logFC, cmap="vlag", figsize=(10, 10), linewidths=0.75, linecolor= 'black',
                    dendrogram_ratio=(.175, .025), center=0, annot=pivot_df_pvalues, fmt='',
                    col_colors =col_colors, xticklabels=False, square=True, cbar_pos=(-0.05, 0.55, 0.05, 0.18), col_cluster=False)
cg.ax_row_dendrogram.set_visible(False) #suppress row dendrogram
cg.ax_col_dendrogram.set_visible(False) #suppress row dendrogram
cg.cax.set_title('NES', pad=5)
cond_legend = cg.ax_heatmap.legend(labels=[f'p-value < 0.05'], frameon=False, handles=[plt.Line2D([], [], marker='*', color='black', linestyle='None', lw=2)], bbox_to_anchor=(0.01, 0.575))
cg.ax_heatmap.add_artist(cond_legend)
cg.ax_heatmap.legend(handles=legend_elements, bbox_to_anchor=(0, 1.15), ncol=1, frameon=False)

# Manually set the x-axis title at the top
cg.ax_heatmap.set_title('Condition', y=1.125)

# Optionally, if you want to remove the default x-axis label
cg.ax_heatmap.set_xlabel('')
cg.ax_heatmap.set_ylabel('')

cg.ax_heatmap.set_position([cg.ax_heatmap.get_position().x0, cg.ax_heatmap.get_position().y0,
                           cg.ax_heatmap.get_position().width, cg.ax_heatmap.get_position().height])
cg.ax_col_colors.set_position([cg.ax_col_colors.get_position().x0, cg.ax_col_colors.get_position().y0 + 0.010,
                              cg.ax_col_colors.get_position().width, cg.ax_col_colors.get_position().height])

In [None]:
! jupyter nbconvert --to html 14d_VisualizationDEG_Results.ipynb