# This Notebook shows the basic usage and functionalities of metaKEGG.
# Data displayed here are artificially generated. Ensemble IDs are arbitrary and do not reflect the official Gene Symbols. The pipelines can process human, mouse or any official gene symbols

In [36]:
import pandas as pd
import metaKEGG
import os

## Variables

In [37]:
GENES_PATHWAYS_FILE_PATH = "./single_input_genes.xlsx"
PATHWAYS_SHEET_NAME = "pathways"
GENES_SHEET_NAME = "gene_metrics"


## Load Data and Inspect input file

In [38]:
pathways_df = pd.read_excel(GENES_PATHWAYS_FILE_PATH , sheet_name=PATHWAYS_SHEET_NAME)
pathways_df.head()

Unnamed: 0,Category,Term,Count,%,PValue,Genes,List Total,Pop Hits,Pop Total,Fold Enrichment Bonferroni,Bonferroni,Benjamini,FDR
0,KEGG_PATHWAY,mmu04932:Non-alcoholic fatty liver disease,130,49.618321,2e-06,"Ndufb4, Bcl2l11, Ndufb8, Atf4, Ndufa13, Il6, I...",130,157,9396,3.18567,0.012949,0.021239,0.003745


In [39]:
pathways_df.columns

Index(['Category', 'Term', 'Count', '%', 'PValue', 'Genes', 'List Total',
       'Pop Hits', 'Pop Total', 'Fold Enrichment Bonferroni', 'Bonferroni',
       'Benjamini', 'FDR'],
      dtype='object')

In [40]:
pathway_ids = pathways_df['Term'].str.split(':')[0][0]
pathway_ids

'mmu04932'

In [41]:
pathway_names = pathways_df['Term'].str.split(':')[0][1]
pathway_names

'Non-alcoholic fatty liver disease'

In [42]:
genes_in_pathway = pathways_df['Genes'].str.split(', ')[0]
genes_in_pathway[0:5]

['Ndufb4', 'Bcl2l11', 'Ndufb8', 'Atf4', 'Ndufa13']

In [43]:
gene_metrics_df = pd.read_excel(GENES_PATHWAYS_FILE_PATH , sheet_name=GENES_SHEET_NAME)
gene_metrics_df.head()

Unnamed: 0,ID,gene_symbol,logFC
0,ENSMUSG00000000001,Il6,1.750551
1,ENSMUSG00000000002,Il6ra,2.086199
2,ENSMUSG00000000003,Socs3,-0.306218
3,ENSMUSG00000000004,Tnf,-2.4997
4,ENSMUSG00000000005,Tnfrsf1a,2.715626


In [44]:
gene_symbols = gene_metrics_df['gene_symbol'].tolist()
logFCs = gene_metrics_df['logFC'].tolist()

gene_symbol_logFC_dict = dict(zip(gene_symbols, logFCs))

## Run Analysis

### Gene Expression

In [45]:
# This function takes a single input file as an argument and maps the detected genes on the enriched KEGG reference pathway, and colors them according to their log2FC values.

In [46]:
# Define parameters
input_file_path = "./single_input_genes.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
count_threshold = 2
pathway_pvalue_threshold = None
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [47]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps,
                                folder_extension=folder_extension)

# Run the pipeline
my_pipeline.gene_expression(benjamini_threshold=benjamini_threshold,
                            count_threshold=count_threshold,
                            pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis: Gene expression...

Will create folder : .\draw_KEGG_2025-12-03_genes
New directory '.\draw_KEGG_2025-12-03_genes' created  

Output folder is .\draw_KEGG_2025-12-03_genes

Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)

Done! 
Output files are located in .\draw_KEGG_2025-12-03_genes


### Transcript expression

In [48]:
# This function takes a single input file as an argument and maps the detected transcripts on the enriched KEGG reference pathway, and colors them according to their log2FC values.

# NOTE: Pathway enrichement analysis with the DAVID Functional Annotation Tool, should be performed using transcript IDs.

In [49]:
# Inspect the file

TRANSCRIPTS_PATHWAYS_FILE_PATH = "./single_input_transcripts.xlsx"
PATHWAYS_SHEET_NAME = "pathways"
TRANSCRIPTS_SHEET_NAME = "transcript_metrics"


In [50]:
transcript_metrics_df = pd.read_excel(TRANSCRIPTS_PATHWAYS_FILE_PATH , sheet_name=TRANSCRIPTS_SHEET_NAME)
transcript_metrics_df.head()

Unnamed: 0,ID,gene_symbol,logFC
0,ENSMUST00000000001,Il6,0.448476
1,ENSMUST00000000002,Il6,2.070121
2,ENSMUST00000000003,Il6ra,2.558087
3,ENSMUST00000000004,Il6ra,1.532812
4,ENSMUST00000000005,Socs3,2.64526


In [51]:
transcript_metrics_df['gene_symbol'].value_counts().head()

gene_symbol
Itch      5
Cycs      4
Fas       4
Srebf1    3
Nfkb1     3
Name: count, dtype: int64

In [52]:
# Define parameters
input_file_path = "./single_input_transcripts.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "transcript_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
count_threshold = 2
pathway_pvalue_threshold = None
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [53]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps,
                                folder_extension=folder_extension)

# Run the pipeline
my_pipeline.transcript_expression(benjamini_threshold=benjamini_threshold,
                                  count_threshold=count_threshold,
                                  pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis: Transcript expression...

Will create folder : .\draw_KEGG_2025-12-03_transcripts
New directory '.\draw_KEGG_2025-12-03_transcripts' created  

Output folder is .\draw_KEGG_2025-12-03_transcripts

Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_transcripts


### Bulk RNAseq mapping

In [54]:
# This function takes RANseq data, as single input file argument, maps the genes on a provided list of target pathways (assuming they are also found in the target pathways), and colors them according to their log2FC values.

In [55]:
# Define parameters
input_file_path = "./single_input_bulk.xlsx"
genes_column = "gene_symbol"
log2fc_column = "logFC"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
save_to_eps = False
folder_extension = None

In [56]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column = genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps)
# Run the pipeline
my_pipeline.bulk_rnaseq_mapping()

Executing analysis : Bulk RNAseq mapping...

Will create folder : .\draw_KEGG_2025-12-03_bulk
New directory '.\draw_KEGG_2025-12-03_bulk' created  

Output folder is .\draw_KEGG_2025-12-03_bulk
Parsing input file...

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_bulk


### Multiple inputs

In [57]:
# This function takes a list of inputs file as an argument and only maps pathways that are found in all of the inputs files.
# For a common pathway, it will generate all possible states for a gene, from each individual input, to all possible combinations and assignes a unique color code to each combination.
#     The detected genes are mapped enriched KEGG reference pathway, based on the state they're in.

In [58]:
# Input files list
input_file_path = ["./single_input_genes.xlsx",
                   "./multiple_inputs_1.xlsx",
                   "./multiple_inputs_2.xlsx"]

# Define parameters
input_label = ["input1" , "input2" , "input3"]
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
count_threshold = 2
pathway_pvalue_threshold = None
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [59]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps)

# Run the pipeline
my_pipeline.multiple_inputs(input_label=input_label, # Label list for each input file
                            count_threshold=count_threshold,
                            benjamini_threshold=benjamini_threshold,
                            pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis : Multiple inputs...

Will create folder : .\draw_KEGG_2025-12-03_3_inputs
New directory '.\draw_KEGG_2025-12-03_3_inputs' created  

Output folder is .\draw_KEGG_2025-12-03_3_inputs

You want to map 3 inputs in total.
Parsing input file...

File Counter: 1, File: ./single_input_genes.xlsx, with name input1

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

File Counter: 2, File: ./multiple_inputs_1.xlsx, with name input2

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

File Counter: 3, File: ./multiple_inputs_2.xlsx, with name input3

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info & mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output 

### Differentially methylated genes

In [60]:
# This function takes a single input file and a methylation metadata file as arguments and maps the detected genes on the enriched KEGG reference pathway, and colors them according to their methylation state.
# The state is defined as a binary reprsentation, depending if DMPs corresponding to a given gene are detected, or not.

In [61]:
# Inspect the metadata file

DMPS_FILE_PATH = "./methylation.csv"

dmps_df = pd.read_csv(DMPS_FILE_PATH)
dmps_df.head()

Unnamed: 0,CG_ID,methylation_gene_symbol,methylation_pval,methylation_logFC
0,cg_1,Ndufs8,0.049801,1.915612
1,cg_2,Uqcrq,0.013704,-0.818593
2,cg_3,Ndufb4b,0.04004,2.648866
3,cg_4,Cox5a,0.035938,2.818396
4,cg_5,Nr1h3,0.030564,-0.230053


In [62]:
# Define parameters
input_file_path = "./single_input_genes.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
methylation_file_path = "./methylation.csv"
methylation_genes_column = "methylation_gene_symbol"
methylation_pvalue_column = "methylation_pval"
methylation_pvalue_threshold = 0.05
count_threshold = 2
pathway_pvalue_threshold = None
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [63]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps)

# Run the pipeline
my_pipeline.methylated_genes(methylation_file_path=methylation_file_path,
                              methylation_genes_column=methylation_genes_column,
                              methylation_pvalue_column=methylation_pvalue_column,
                              methylation_pvalue_threshold=methylation_pvalue_threshold,
                              count_threshold=count_threshold,
                              benjamini_threshold=benjamini_threshold,
                              pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis : Methylated genes...

Will create folder : .\draw_KEGG_2025-12-03_methylation
New directory '.\draw_KEGG_2025-12-03_methylation' created  

Output folder is .\draw_KEGG_2025-12-03_methylation

Opening CSV file: ./methylation.csv
Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_methylation


### DMPs per gene

In [64]:
# This function takes a single input file and a methylation metadata file as arguments and maps the detected genes on the enriched KEGG reference pathway.
# It generates bins to quantify the number of DMPs that correspond to a given gene, and colors a gege according to its DMP bin.
# The function also returns the quantification histogram plots, both in a grouped and an absolute count representation.

In [65]:
# Inspect the metadata file

DMPS_PER_GENE_FILE_PATH = "./methylation_for_quantification.csv"

dmps_per_gene_df = pd.read_csv(DMPS_PER_GENE_FILE_PATH)
dmps_per_gene_df.head()

Unnamed: 0,CG_ID,methylation_gene_symbol,methylation_pval,methylation_logFC
0,cg100_1,Ndufs8,0.049801,1.915612
1,cg100_2,Uqcrq,0.013704,-0.818593
2,cg100_3,Ndufb4b,0.04004,2.648866
3,cg100_4,Cox5a,0.035938,2.818396
4,cg100_5,Nr1h3,0.030564,-0.230053


In [66]:
dmps_per_gene_df['methylation_gene_symbol'].value_counts().head()

methylation_gene_symbol
Ppara      13
Ndufs1     10
Ndufb4b     9
Bax         9
Eif2s1      8
Name: count, dtype: int64

In [67]:
# Define parameters
input_file_path = "./single_input_genes.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
methylation_file_path = "./methylation_for_quantification.csv"
methylation_genes_column = "methylation_gene_symbol"
methylation_pvalue_column = "methylation_pval"
methylation_pvalue_threshold = 0.05
methylation_probe_column = "CG_ID"
probes_to_cgs=False
count_threshold = 2
pathway_pvalue_threshold = None
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [68]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps)

# Run the pipeline
my_pipeline.dmps_per_gene(methylation_file_path=methylation_file_path,
                          methylation_genes_column=methylation_genes_column,
                          methylation_pvalue_column=methylation_pvalue_column,
                          methylation_pvalue_threshold=methylation_pvalue_threshold,
                          methylation_probe_column=methylation_probe_column,
                          probes_to_cgs=probes_to_cgs,
                          count_threshold=count_threshold,
                          benjamini_threshold=benjamini_threshold,
                          pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis : DMPs per gene...

Will create folder : .\draw_KEGG_2025-12-03_methylation_quantification
The contents of '.\draw_KEGG_2025-12-03_methylation_quantification' have been removed, and the directory has been recreated.
Output folder is .\draw_KEGG_2025-12-03_methylation_quantification

Opening CSV file: ./methylation_for_quantification.csv
Will not perform probe correction...

Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_methylation_quantification


#### Probe correction

In [69]:
# NOTE: When using probes_to_cgs=True, the pipeline will split the CG probes by the underscore '_' character and keep the first part, essentially correcting for different probe chemistry that could occur in the same position.

# Example format is cg00000000_BC21 and cg00000000_TC21, which would be counted as two separate probes targeting the same gene.
# Using the argument probes_to_cgs with True, the probes become cg00000000 & cg00000000, and duplicated entries per gene are eliminated, essentially counting one probe for the target gene.

### miRNA target genes

In [70]:
# This function takes a single input file and a miRNA metadata file as arguments and maps the detected genes on the enriched KEGG reference pathway, and colors them according to their miRNA state.
# The state is defined as a binary reprsentation, depending if DEmiRs are targeting a given gene, or not.

In [71]:
DEMIRS_FILE_PATH = "./miRNA.tsv"

demirs_df = pd.read_csv(DEMIRS_FILE_PATH , sep='\t')
demirs_df.head()

Unnamed: 0,miRNA_ID,miRNA_gene_symbol,miRNA_pval,miRNA_logFC
0,mmu-miR-1-3p,dufb4,0.567302,1.234433
1,mmu-miR-2-3p,Atf4,0.756792,-0.002424
2,mmu-miR-3-3p,Prkag3,0.737903,2.747361
3,mmu-miR-4-3p,Xbp1,0.381659,0.802038
4,mmu-miR-5-3p,Prkag3,0.823298,2.281397


In [72]:
# Define parameters
input_file_path = "./single_input_genes.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
miRNA_file_path = "./miRNA.tsv"
miRNA_genes_column = "miRNA_gene_symbol"
miRNA_pvalue_column = "miRNA_pval"
miRNA_pvalue_threshold=0.05
pathway_pvalue_threshold = None
count_threshold = 2
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [73]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps)

# Run the pipeline
my_pipeline.mirna_target_genes(miRNA_file_path=miRNA_file_path,
                                miRNA_genes_column=miRNA_genes_column,
                                miRNA_pvalue_column=miRNA_pvalue_column,
                                miRNA_pvalue_threshold=miRNA_pvalue_threshold,
                                count_threshold=count_threshold,
                                benjamini_threshold=benjamini_threshold,
                                pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis : miRNA target genes...

Will create folder : .\draw_KEGG_2025-12-03_miRNA
The contents of '.\draw_KEGG_2025-12-03_miRNA' have been removed, and the directory has been recreated.
Output folder is .\draw_KEGG_2025-12-03_miRNA

Opening TSV file: ./miRNA.tsv
Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_miRNA


### DEmiRs per gene

In [None]:
# This function takes a single input file and a miRNA metadata file as arguments and maps the detected genes on the enriched KEGG reference pathway.
# It generates bins to quantify the number of DEmiRs that correspond to a given gene, and colors a gege according to its DEmiR bin.
# The function also returns the quantification histogram plots, both in a grouped and an absolute count representation.

In [79]:
# Inspect the metadata file

DEMIRS_PER_GENE_FILE_PATH = "./miRNA_for_quantification.tsv"

demirs_per_genes_df = pd.read_csv(DEMIRS_FILE_PATH , sep='\t')
demirs_per_genes_df.head()

Unnamed: 0,miRNA_ID,miRNA_gene_symbol,miRNA_pval,miRNA_logFC
0,mmu-miR-1-3p,dufb4,0.567302,1.234433
1,mmu-miR-2-3p,Atf4,0.756792,-0.002424
2,mmu-miR-3-3p,Prkag3,0.737903,2.747361
3,mmu-miR-4-3p,Xbp1,0.381659,0.802038
4,mmu-miR-5-3p,Prkag3,0.823298,2.281397


In [86]:
demirs_per_genes_df['miRNA_gene_symbol'].value_counts().head()

miRNA_gene_symbol
Prkaa1     2
Cox8c      2
Ndufb8     2
Il1a       2
Bcl2l11    2
Name: count, dtype: int64

In [74]:
# Define parameters

input_file_path = "./single_input_genes.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
miRNA_file_path = "./miRNA_for_quantification.tsv"
miRNA_genes_column = "miRNA_gene_symbol"
miRNA_pvalue_column = "miRNA_pval"
miRNA_pvalue_threshold = 0.05
miRNA_column = "miRNA_ID"
pathway_pvalue_threshold = None
count_threshold = 2
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [75]:
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                                sheet_name_paths=sheet_name_paths,
                                sheet_name_genes=sheet_name_genes,
                                genes_column=genes_column,
                                log2fc_column=log2fc_column,
                                save_to_eps=save_to_eps)

my_pipeline.demirs_per_gene(miRNA_file_path=miRNA_file_path,
                            miRNA_genes_column=miRNA_genes_column,
                            miRNA_pvalue_column=miRNA_pvalue_column,
                            miRNA_pvalue_threshold=miRNA_pvalue_threshold,
                            miRNA_ID_column=miRNA_column, 
                            count_threshold=count_threshold,
                            benjamini_threshold=benjamini_threshold,
                            pathway_pvalue_threshold=pathway_pvalue_threshold)


Executing analysis : DEmiRs per gene...

Will create folder : .\draw_KEGG_2025-12-03_miRNA_quantification
New directory '.\draw_KEGG_2025-12-03_miRNA_quantification' created  

Output folder is .\draw_KEGG_2025-12-03_miRNA_quantification

Opening TSV file: ./miRNA_for_quantification.tsv
Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_miRNA_quantification


### Methylated + miRNA target genes

In [76]:
# Define parameters
input_file_path = "./single_input_genes.xlsx"
sheet_name_paths = "pathways"
sheet_name_genes = "gene_metrics"
genes_column = "gene_symbol"
log2fc_column = "logFC"
methylation_file_path = "./methylation.csv"
methylation_genes_column = "methylation_gene_symbol"
methylation_pvalue_column = "methylation_pval"
methylation_pvalue_threshold = 0.05
miRNA_file_path = "./miRNA.tsv"
miRNA_genes_column = "miRNA_gene_symbol"
miRNA_pvalue_column = "miRNA_pval"
miRNA_pvalue_threshold = 0.05
count_threshold = 2
pathway_pvalue_threshold = None
benjamini_threshold = None
save_to_eps = False
folder_extension = None

In [77]:
# Intialize the object
my_pipeline = metaKEGG.Pipeline(input_file_path=input_file_path,
                        sheet_name_paths=sheet_name_paths,
                        sheet_name_genes=sheet_name_genes,
                        genes_column=genes_column,
                        log2fc_column=log2fc_column,
                        save_to_eps=save_to_eps)

# Run the pipeline
my_pipeline.methylated_and_mirna_target_genes(methylation_file_path=methylation_file_path,
                                                    methylation_genes_column=methylation_genes_column,
                                                    methylation_pvalue_column=methylation_pvalue_column,
                                                    methylation_pvalue_threshold=methylation_pvalue_threshold,
                                                    miRNA_file_path=miRNA_file_path,
                                                    miRNA_genes_column=miRNA_genes_column,
                                                    miRNA_pvalue_column=miRNA_pvalue_column,
                                                    miRNA_pvalue_threshold=miRNA_pvalue_threshold,
                                                    count_threshold=count_threshold,
                                                    benjamini_threshold=benjamini_threshold,
                                                    pathway_pvalue_threshold=pathway_pvalue_threshold)

Executing analysis : Methylated + miRNA target genes...

Will create folder : .\draw_KEGG_2025-12-03_methylation_and_miRNA
The contents of '.\draw_KEGG_2025-12-03_methylation_and_miRNA' have been removed, and the directory has been recreated.
Output folder is .\draw_KEGG_2025-12-03_methylation_and_miRNA

Opening CSV file: ./methylation.csv
Opening TSV file: ./miRNA.tsv
Parsing input file...

Will use thresholds:
Minimum number of genes in pathway : 2 (included)
pathway raw pvalue : None
pathway Benjamini-Hochberg : None

Finished parsing input file

Collecting pathway info...
Finished collecting pathway info

Mapping pathways...

[1/1] mmu04932 (Non-alcoholic fatty liver disease)
Done! 
Output files are located in .\draw_KEGG_2025-12-03_methylation_and_miRNA
