In [None]:
import scanpy as sc
import pandas as pd
import os
import anndata as ad
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import squidpy as sq
from wrapper_functions import *
sns.set()

In [None]:
# Automatically re-load wrapper functions after an update
# Find details here: https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [None]:
sc.logging.print_versions()
sc.set_figure_params(facecolor="white", figsize=(6, 6))
sc.settings.verbosity = 3

In [None]:
organism = Organism.mouse
analyze_params = Analyze(protocol=Protocol.FF, organism=organism)

In [None]:
root_path = os.getcwd()
inpath='your_inpath_folder' # Replace with the location of your samples
results_folder = os.path.join(root_path, 'analyzed')

In [None]:
msigdb_mouse = pd.read_csv("msigdb_complete.csv")

In [None]:
msigdb_mouse[msigdb_mouse['collection'] == 'go_biological_process']

In [None]:
msigdb_mouse[msigdb_mouse['geneset'] == 'GOBP_FATTY_ACID_CATABOLIC_PROCESS']

In [None]:
msigdb_mouse[msigdb_mouse['geneset'] == 'GOBP_FATTY_ACID_BETA_OXIDATION']

In [None]:
msigdb_mouse[msigdb_mouse['geneset'] == 'GOBP_RHYTHMIC_BEHAVIOR']

In [None]:
genes_fatty_accid_catabolic_process = msigdb_mouse[msigdb_mouse['geneset'] == 'GOBP_FATTY_ACID_CATABOLIC_PROCESS']['genesymbol'].unique().tolist()
genes_fatty_accid_beta_oxidation = msigdb_mouse[msigdb_mouse['geneset'] == 'GOBP_FATTY_ACID_BETA_OXIDATION']['genesymbol'].unique().tolist()
genes_rhytmic_behavior = msigdb_mouse[msigdb_mouse['geneset'] == 'GOBP_RHYTHMIC_BEHAVIOR']['genesymbol'].unique().tolist()

In [None]:
file_names = [f for f in os.listdir(results_folder) if os.path.isfile(os.path.join(results_folder, f))]

adata_list = [ad.read(os.path.join(results_folder, file)) for file in file_names if file.endswith('.h5ad')]

In [None]:
for adata in adata_list:
    
    # We store the raw counts into the layers attribute for further usage. 
    adata.X = np.round(adata.X)
    adata.layers['counts'] = adata.X.copy()

    sc.pp.normalize_total(adata, inplace=True)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2000, inplace=True)
    
    adata.layers['normalized'] = adata.X
    
    # sc.tl.score_genes(adata, gene_list=genes_fatty_accid_catabolic_process, score_name='fatty_accid_catabolic_process')
    # sc.tl.score_genes(adata, gene_list=genes_fatty_accid_beta_oxidation, score_name='fatty_accid_beta_oxidation')
    # sc.tl.score_genes(adata, gene_list=genes_rhytmic_behavior, score_name='rhytmic_behavior')
    
    # print(adata.obs['Sample_ID'].unique()[0])
    # print(adata.obs['Condition'].unique()[0])
    # print(adata.obs['Gender'].unique()[0])
    
    # sc.pl.spatial(adata, color='fatty_accid_catabolic_process', size=1.25, cmap='coolwarm')
    # sc.pl.spatial(adata, color='fatty_accid_beta_oxidation', size=1.25, cmap='coolwarm')
    # sc.pl.spatial(adata, color='rhytmic_behavior', size=1.25, cmap='coolwarm')

In [None]:
# concatenate all samples
adata_concat = sc.concat(
    adata_list,
    label="readout_id",
    uns_merge="unique",
    keys=[
        k
        for d in [adata.uns["spatial"] for adata in adata_list]
        for k, v in d.items()
    ],
    index_unique="-",
    join='outer',
)

In [None]:
adata_concat

In [None]:
sc.tl.score_genes(adata_concat, gene_list=genes_fatty_accid_catabolic_process, score_name='fatty_accid_catabolic_process', ctrl_size=100, n_bins=50)
sc.tl.score_genes(adata_concat, gene_list=genes_fatty_accid_beta_oxidation, score_name='fatty_accid_beta_oxidation', ctrl_size=100, n_bins=50)
sc.tl.score_genes(adata_concat, gene_list=genes_rhytmic_behavior, score_name='rhytmic_behavior', ctrl_size=100, n_bins=50)

In [None]:
for i, library in enumerate(
   adata_concat.obs["readout_id"].unique().tolist()
):
    current_adata = adata_concat[adata_concat.obs.readout_id == library, :].copy()
    print(library)
    print(current_adata.obs['Condition'].unique()[0])
    print(current_adata.obs['Gender'].unique()[0])
    sc.pl.spatial(
        current_adata,
        img_key="hires",
        library_id=library,
        color='fatty_accid_catabolic_process',
        size=1.25,
        cmap='coolwarm')

    sc.pl.spatial(
        current_adata,
        img_key="hires",
        library_id=library,
        color='fatty_accid_beta_oxidation',
        size=1.25,
        cmap='coolwarm')

    sc.pl.spatial(
        current_adata,
        img_key="hires",
        library_id=library,
        color='rhytmic_behavior',
        size=1.25,
        cmap='coolwarm')



And focusing on the leading edge genes? 

In [None]:
inpath='your_inpath_folder' # Replace with the location of your samples
results_folder = os.path.join(root_path, 'analyzed', 'DE', 'DE_Results_14c')
folder_path_gsea = os.path.join(inpath, results_folder, 'GSEA_results')  
all_gsea_dfs = []  # To store all your dataframes

for filename in os.listdir(folder_path_gsea):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_gsea, filename)
        df = pd.read_csv(file_path)
        # df.columns = ['TF', 'ActivityScore', 'Pvalue']
        df['Condition'] = filename  
        df['Condition'] = df['Condition'].str.replace('.csv', '', regex=False) 
        # df.set_index('TF', inplace=True)
        all_gsea_dfs.append(df)

In [None]:
combined_gsea_df = pd.concat(all_gsea_dfs, ignore_index=True)

In [None]:
GOBP_results_terms = combined_gsea_df[combined_gsea_df['Term.1'].str.startswith('GOBP_')]

In [None]:
GOBP_results_terms['Sex'] = GOBP_results_terms['Condition'].str.split('_', expand=True)[[0]]
GOBP_results_terms['Treatment'] = GOBP_results_terms['Condition'].str.split('_', expand=True)[1]
GOBP_results_terms['Region'] = GOBP_results_terms['Condition'].str.split('_', expand=True)[4]

In [None]:
GOBP_results_terms

In [None]:
GOBP_results_terms[GOBP_results_terms['Term.1'] == 'GOBP_FATTY_ACID_CATABOLIC_PROCESS']

In [None]:
GOBP_results_terms_male = GOBP_results_terms[GOBP_results_terms['Sex'] == 'Male']

In [None]:
all_conditions = GOBP_results_terms['Condition'].unique().tolist()
all_terms = ['GOBP_FATTY_ACID_CATABOLIC_PROCESS', 'GOBP_FATTY_ACID_BETA_OXIDATION', 'GOBP_RHYTHMIC_BEHAVIOR']

for current_condition in all_conditions:

    print(" ")
    print("********************")
    print(current_condition)
    GOBP_results_terms_condition = GOBP_results_terms[GOBP_results_terms['Condition'] == current_condition]

    for current_term in all_terms: 
        print(current_term)
        current_leading_genes_df = GOBP_results_terms_condition[GOBP_results_terms_condition['Term.1'] == current_term]

        current_leading_genes = current_leading_genes_df['Leading edge'].str.cat(sep=',').split(';')
        
        print(current_leading_genes)
        print("********************")

In [None]:
# Initialize a dictionary to store common genes for each term
common_genes_per_term = {}
GOBP_results_myterms = GOBP_results_terms[GOBP_results_terms['Term.1'].isin(all_terms)]

# Group by 'term' and find common genes
for term, group in GOBP_results_myterms.groupby('Term.1'):
    # Initialize common genes as the set of genes in the first row of the group
    common_genes = set(group['Leading edge'].iloc[0].split(';'))
    
    # Iterate through the rows within the group
    for genes in group['Leading edge']:
        common_genes &= set(genes.split(';'))  # Intersection to find common genes
    
    # Store the result
    common_genes_per_term[term] = list(common_genes)

# Show the results
for term, genes in common_genes_per_term.items():
    print(f"Common genes for term {term}: {genes}")

In [None]:
sc.tl.score_genes(adata_concat, gene_list= ['Eci2', 'Ech1'], score_name='Eci2_Ech1')
sc.tl.score_genes(adata_concat, gene_list= ['Nr1d1', 'Nr1d2'], score_name='Nr1d1_Nr1d2')

In [None]:
for i, library in enumerate(
   adata_concat.obs["readout_id"].unique().tolist()
):
    current_adata = adata_concat[adata_concat.obs.readout_id == library, :].copy()
    print(library)
    print(current_adata.obs['Condition'].unique()[0])
    print(current_adata.obs['Gender'].unique()[0])
    sc.pl.spatial(
        current_adata,
        img_key="hires",
        library_id=library,
        color='Eci2_Ech1',
        size=1.25,
        cmap='coolwarm')

    sc.pl.spatial(
        current_adata,
        img_key="hires",
        library_id=library,
        color='Nr1d1_Nr1d2',
        size=1.25,
        cmap='coolwarm')

In [None]:
! jupyter nbconvert --to html 26_GSEA_SpatialVIsualizations.ipynb