In [None]:
import rp2.data
from rp2 import hagai_2018

In [None]:
condition_columns = ["replicate", "treatment", "time_point"]
time_points = ["0", "2", "4", "6"]

gene_info_df = rp2.load_biomart_gene_symbols_df("mouse")

In [None]:
mouse_umi_adata = hagai_2018.load_umi_counts("mouse")
mouse_umi_adata = mouse_umi_adata[mouse_umi_adata.obs.time_point.isin(time_points)].copy()

print("Full Hagai mouse dataset has:")
print(f"  {mouse_umi_adata.n_obs:,} cells")
print(f"  {mouse_umi_adata.n_vars:,} genes")

assert(mouse_umi_adata.n_obs == 53_086)
assert(mouse_umi_adata.n_vars == 22_048)

del mouse_umi_adata

In [None]:
mouse_counts_adata = hagai_2018.load_counts("mouse", scaling="median")
mouse_counts_adata = mouse_counts_adata[mouse_counts_adata.obs.time_point.isin(time_points)].copy()

print("Scaled Hagai mouse dataset has:")
print(f"  {mouse_counts_adata.n_vars:,} genes")

assert(mouse_counts_adata.n_obs == 53_086)
assert(mouse_counts_adata.n_vars == 16_798)

In [None]:
lps_responsive_gene_ids = mouse_counts_adata.var.index[mouse_counts_adata.var.lps_responsive]
print(f"{len(lps_responsive_gene_ids):,} genes are LPS-responsive")

assert(len(lps_responsive_gene_ids) == 2_336)

In [None]:
additional_gene_symbols = ["Tnf"]
additional_gene_ids = gene_info_df.index[gene_info_df.symbol.isin(additional_gene_symbols)]
analysis_gene_ids = sorted(set(lps_responsive_gene_ids).union(additional_gene_ids))

print(f"{len(analysis_gene_ids):,} genes to be used in analysis")

assert(len(analysis_gene_ids) == 2_337)

In [None]:
condition_df = mouse_counts_adata.obs[condition_columns].drop_duplicates()

print(f"{len(condition_df)} conditions per gene")
print(f"{len(condition_df) * len(analysis_gene_ids)} conditions overall")

display(condition_df.replicate.value_counts().sort_index())

assert(len(condition_df) == 20)
assert((20 * 2_337) == 46_740)

del condition_df