# CoGAPS Analysis on Batch-Corrected scRNA-seq Data

This notebook demonstrates the implementation of the CoGAPS algorithm for Non-Negative Matrix Factorization on batch-corrected single-cell RNA sequencing data using Python. The analysis aims to identify underlying biological patterns while mitigating technical variations across different batches.

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
from cogaps import CoGAPS
import matplotlib.pyplot as plt
import plotly.express as px

# Load scRNA-seq data (replace 'data.h5ad' with your dataset)
adata = sc.read_h5ad('data.h5ad')

# Preprocess data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Batch effect correction using ComBat
sc.pp.combat(adata, key='batch')

# Extract expression matrix
expression_matrix = adata.X.toarray()

# Perform CoGAPS
cogaps_model = CoGAPS(n_patterns=8, n_runs=10, seed=42)
cogaps_results = cogaps_model.run(expression_matrix)

# Add CoGAPS results to AnnData object
adata.obsm['X_cogaps_amplitude'] = cogaps_results.amplitude
adata.obsm['X_cogaps_pattern'] = cogaps_results.pattern

# Visualize CoGAPS components using Plotly
fig = px.imshow(cogaps_results.pattern,
                labels=dict(x="Components", y="Genes", color="Intensity"),
                x=[f"Pattern {i+1}" for i in range(cogaps_results.pattern.shape[1])],
                y=adata.var_names)
fig.update_layout(title="CoGAPS Patterns Heatmap",
                  xaxis_title="Components",
                  yaxis_title="Genes",
                  coloraxis_colorbar=dict(title="Intensity")) 
fig.show()

# Save the results
adata.write('cogaps_results.h5ad')