In [None]:
import sys
sys.path.append('../')

import scanpy as sc
from src.grn.inference import GRNInferenceEngine
from src.grn.validation import GRNValidator
from src.grn.network_analysis import GRNAnalyzer
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
adata = sc.read_h5ad('../data/processed/E-MTAB-8414_processed.h5ad')
print(f'Loaded: {adata.n_obs} cells x {adata.n_vars} genes')
print(adata.obs['cell_type'].value_counts())


In [None]:
engine = GRNInferenceEngine(adata)


In [None]:
expr_matrix = engine.prepare_expression_matrix(cell_subset=None, use_highly_variable=True)
grn_full = engine.infer_grn(expr_matrix, n_workers=4)

print(f'Inferred {len(grn_full)} TF-target interactions')
print(f'Mean importance: {grn_full["importance"].mean():.4f}')


In [None]:
grn_filtered = engine.filter_high_confidence_edges(grn_full, method='percentile', threshold=0.85)
print(f'High-confidence edges: {len(grn_filtered)}')


In [None]:
validator = GRNValidator(grn_filtered)
chipseq_db = validator.load_chipseq_database()
overlap_stats = validator.compute_chipseq_overlap(chipseq_db)

print(overlap_stats[['TF', 'precision', 'recall']])


In [None]:
analyzer = GRNAnalyzer(grn_filtered)
centrality = analyzer.compute_centrality_metrics()
hubs = analyzer.identify_hubs(centrality, percentile=0.90)

print('Top 10 hub genes:')
print(hubs[['gene', 'pagerank', 'gene_type']].head(10))


In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=hubs.head(15), x='pagerank', y='gene', hue='gene_type')
plt.title('Top 15 Hub Genes by PageRank')
plt.xlabel('PageRank Score')
plt.tight_layout()
plt.savefig('../results/figures/grn_hub_genes.pdf')


In [None]:
engine.save_network(grn_filtered, '../results/grn_filtered')
print('GRN saved to results/grn_filtered.csv')
