In [1]:
import numpy as np
import pandas as pd
import anndata
import matplotlib.pyplot as plt
import spatialdata as sd
import scanpy as sc

In [None]:
adata = sc.read_h5ad('filtered_data.h5ad')
adata

In [6]:
genes = adata.var.index

In [7]:
cell_cycle_genes = [x.strip() for x in open('scanpy.txt')]

In [8]:
s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]

In [None]:
# Get cell cycle genes 
missed_genes = [x for x in cell_cycle_genes if x not in adata.var_names]
cell_cycle_genes = [x for x in cell_cycle_genes if x in adata.var_names]
s_genes_true = [x for x in s_genes if x in adata.var_names]
g2m_genes_true = [x for x in g2m_genes if x in adata.var_names]

print('cell cycle genes: ', len(cell_cycle_genes), '\nmissed genes: ', len(missed_genes))
print(f"G1/S genes: {len(s_genes)} - G2/M genes: {len(g2m_genes)}")
print(f"G1/S genes in our dataest: {len(s_genes_true)} - G2/M genes in our dataest: {len(g2m_genes_true)}")

In [None]:
missed_genes

In [11]:
adata_cc = adata.copy()
sc.pp.normalize_total(adata_cc)
sc.pp.log1p(adata_cc)
sc.pp.scale(adata_cc)
sc.tl.score_genes_cell_cycle(adata_cc, s_genes=s_genes_true, g2m_genes=g2m_genes_true)


In [14]:
adata.obs['phase'] = adata_cc.obs['phase']
adata.obs['S_score'] = adata_cc.obs['S_score']
adata.obs['G2M_score'] = adata_cc.obs['G2M_score']

In [None]:
# Calculate cell cycle phase proportions for each sample
phase_proportions = pd.crosstab(adata.obs['sample'], adata.obs['phase'], normalize='index') * 100

# Reorder samples from control to increasing Tg time and simplify sample names
sample_order = ['C3control', 'B4Tg15min', 'B5Tg30min', 'B6Tg1h', 'C4Tg2h', 'C5Tg4h']
simplified_names = ['Control', 'Tg15min', 'Tg30min', 'Tg1h', 'Tg2h', 'Tg4h']
phase_proportions = phase_proportions.reindex(sample_order)
phase_proportions.index = simplified_names

# Define colors for each cell cycle phase
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']  # G1, G2M, S

# Create stacked bar plot
plt.figure(figsize=(10, 6))
phase_proportions.plot(kind='bar', stacked=True, color=colors, width=0.8)  # Increased bar width
plt.title('Cell Cycle Phase Distribution by Sample', pad=20, fontsize=12)
plt.ylabel('Percentage (%)', fontsize=10)
plt.xticks(rotation=45)  # Rotate labels 45 degrees
plt.legend(title='Cell Cycle Phase', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

# Print the numerical values
print("\nCell cycle phase proportions (%) by sample:")
print(phase_proportions.round(2))

In [16]:
adata_cc = adata_cc[adata_cc.obs['pass_all_filters'], cell_cycle_genes]

In [None]:
# PCA 
sc.tl.pca(adata_cc)

plt.figure(figsize=(8, 6))

# Print the color mapping for phases
print("Phase colors:")
for phase, color in zip(adata_cc.obs['phase'].unique(), adata_cc.uns['phase_colors']):
    print(f"{phase}: {color}")

sc.pl.pca_scatter(
    adata_cc,
    color="phase", 
    title="PCA by cell cycle genes",
    size=50,
    show=False  
)

plt.tight_layout()  
plt.show()

In [6]:
adata.write_h5ad('cellcyclescore.h5ad')