In [None]:
import pandas as pd
from pydeseq2.dds import DeseqDataSet
from pydeseq2.default_inference import DefaultInference
from pydeseq2.ds import DeseqStats

In [None]:
# ==== 1. Load input data ====
# counts: rows = genes, columns = samples
counts_file = "path/to/count_matrix.csv" 
metadata_file = "path/to/metadata.csv"   

In [None]:
counts_df = pd.read_csv(counts_file, index_col=0)
metadata_df = pd.read_csv(metadata_file, index_col=0)

In [None]:
# Ensure sample order matches between counts and metadata
metadata_df = metadata_df.loc[counts_df.columns]

In [None]:
# ==== 2. Define DESeq2 dataset ====
dds = DeseqDataSet(
    counts=counts_df,
    metadata=metadat_df,
    design='~condition',
    refit_cooks=True,
    inference=DefaultInference(n_cpus=8),
)

In [None]:
# ==== 3. Fit DESeq2 model ====
dds.deseq2()

# ==== 4. Get results ====
stat_res = DeseqStats(dds, contrast=["condition", "treatment", "control"])
stat_res.summary()

In [None]:
p_values = stat_res.results_df['pvalue'].to_frame()

p_values = stat_res.results_df[stat_res.results_df['pvalue'] < 0.05]
p_values=stat_res.results_df[stat_res.results_df['padj'] < 0.05]
p_values = p_values.sort_values(by='pvalue')

display(p_values)

In [None]:
# Save results to file
stat_res.results_df.to_csv("deseq_results.csv")

# ==== 5. Get normalized counts ====
norm_counts = dds.norm_counts
norm_counts.to_csv("normalized_counts.csv")

print("DESeq2 analysis complete. Results saved to 'deseq_results.csv'.")