# Cell Type Proportions and Signature Analysis

This notebook analyzes cell type proportions across conditions and datasets.


In [None]:
# Proportions by cancer type and dataset
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
adata = sc.read_h5ad('../processed/integrated_annotated.h5ad')
# Use preserved cell_type
ct = adata.obs[['cancer_type','dataset_id','cell_type']].copy()
prop_by_cancer = (ct.groupby(['cancer_type','cell_type']).size()
                    .groupby(level=0).apply(lambda s: s/s.sum())
                    .reset_index(name='prop'))
plt.figure(figsize=(10,5))
for ct_name, sub in prop_by_cancer.groupby('cancer_type'):
    sub_pivot = sub.pivot(index='cancer_type', columns='cell_type', values='prop')
    sub_pivot.plot(kind='bar', stacked=True, legend=False)
plt.title('Cell type composition by cancer type')
plt.tight_layout()
plt.show()

