In [1]:
import pandas as pd
import scanpy as sc


In [2]:
pseudotime_df = pd.read_csv('/storage/lingyuan2/STATES_data/pseudotime_values.csv')
adata = sc.read_h5ad('/storage/lingyuan2/STATES_data/filtered_data.h5ad')
adata.obs['Pseudotime'] = pseudotime_df['Pseudotime'].values
adata.write('/storage/lingyuan2/STATES_data/pseudotime.h5ad')


In [3]:
adata = sc.read_h5ad('/storage/lingyuan2/STATES_data/pseudotime.h5ad')

In [None]:
import numpy as np
adata.layers['totalRNA'] = adata.X.copy()
total_counts = adata.layers['totalRNA'].sum(axis=1)
median_reads = np.median(total_counts)
print(f"Median number of reads per cell: {median_reads}")
adata.layers['totalRNA_norm'] = sc.pp.normalize_total(adata, target_sum=median_reads, layer='totalRNA', inplace=False)['X']
adata.layers['TE'] = np.nan_to_num(adata.layers['TE'],nan=0.0)
adata.layers['rbRNA_norm'] = adata.layers['TE'] * adata.layers['totalRNA_norm']
sc.pp.log1p(adata, layer='rbRNA_norm')
sc.pp.log1p(adata, layer='totalRNA_norm')
sc.pp.log1p(adata, layer='TE')

In [13]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binned_statistic
from scipy.interpolate import make_interp_spline
plt.rcParams['svg.fonttype'] = 'none'
gene_te = adata.layers['TE'][:, adata.var_names.get_loc('IL27RA')]
gene_x = adata.X[:, adata.var_names.get_loc('IL27RA')]
gene_rbRNA = adata.layers['rbRNA'][:, adata.var_names.get_loc('IL27RA')]
pseudotime = adata.obs['Pseudotime']

n_bins = 20
bins = np.linspace(pseudotime.min(), pseudotime.max(), n_bins + 1)

te_means = np.zeros(n_bins)
x_means = np.zeros(n_bins)
rbRNA_means = np.zeros(n_bins)

for i in range(n_bins):
    mask = (pseudotime >= bins[i]) & (pseudotime < bins[i+1])
    bin_cells = np.where(mask)[0]
    
    if len(bin_cells) > 0:
        sum_totalRNA = np.sum(gene_x[bin_cells])
        sum_rbRNA = np.sum(gene_rbRNA[bin_cells])
        original_te = sum_rbRNA / sum_totalRNA if sum_totalRNA > 0 else 0
        normalized_totalRNA = gene_x[bin_cells] / np.sum(gene_x[bin_cells]) * 2203
        normalized_rbRNA = normalized_totalRNA * original_te
        log_totalRNA = np.log1p(normalized_totalRNA)
        log_rbRNA = np.log1p(normalized_rbRNA)
        log_te = np.log1p(original_te)
        te_means[i] = np.mean(log_te)
        x_means[i] = np.mean(log_totalRNA)
        rbRNA_means[i] = np.mean(log_rbRNA)

x_new = np.linspace(bins[:-1].min(), bins[:-1].max(), 300)
te_smooth = make_interp_spline(bins[:-1], te_means)(x_new)
x_smooth = make_interp_spline(bins[:-1], x_means)(x_new)

fig, ax1 = plt.subplots(figsize=(4, 4))
ax2 = ax1.twinx()

ax1.set_box_aspect(1)
ax2.set_box_aspect(1)

ax1.plot(x_new, te_smooth, color='red', label='TE', linewidth=0.5)
ax2.plot(x_new, x_smooth, color='blue', label='TotalRNA', linewidth=0.5)

ax1.set_xlabel('Pseudotime')
ax1.set_ylabel('TE Expression', color='red', fontsize=12)
ax2.set_ylabel('TotalRNA Expression', color='blue', fontsize=12)
plt.title('IL27RA Expression in Pseudotime', fontsize=14)

ax1.tick_params(axis='y', labelcolor='red', labelsize=10)
ax2.tick_params(axis='y', labelcolor='blue', labelsize=10)
ax1.tick_params(axis='x', labelsize=10)

ax1.grid(False)

lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

plt.tight_layout()
#plt.show()
plt.savefig('/storage/lingyuan2/STATES_data/IL27RA_expression_pseudotime.svg', format='svg')
plt.close()

