# Expression pseudotime analysis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Read the data
expr_data = pd.read_csv(f"{data_dir}/posterior_predictions_7_clst.tsv", sep='\t')

In [None]:
# Filter data for gene
gene_data = expr_data[expr_data['gene'] == gene_name]
gene_data

In [None]:
# X vs Y paralogs plots

# Define cluster order
cluster_order = ['Undifferentiated spermatogonia', 'Differentiated spermatogonia', 
                 'Leptotene', 'Zygotene', 'Pachytene', 
                 'Secondary spermatocytes', 'Round spermatids', 'Elongated spermatids']

cluster_labels = ['Undiff\nSPG', 'Diff\nSPG', 'Lep\nSPC', 'Zyg\nSPC', 
                  'Pach\nSPC', 'Sec\nSPC', 'rSD', 'eSD']

species_order = ['Bonobo', 'Chimpanzee', 'Human', 'Macaque']

# Colors for each species
species_colors = {'Bonobo': '#E74C3C', 
                  'Chimpanzee': '#3498DB', 
                  'Human': '#2ECC71', 
                  'Macaque': '#F39C12'}

# Background colors for cell stages
colors_bg = ['#FFE5E5', '#E5F5F5', '#E5F0FF', '#FFF0E5', 
             '#E5F5E8', '#FFFAE5', '#F0E5FF', '#E8E5F0']

# Select  genes
# paralogs
gene_list = ['mc_ampl_VCX2' ,'mc_ampl_VCY1B', 'mc_ampl_HSFX3', 'mc_ampl_HSFY2', 'mc_ampl_RBMX', 'mc_ampl_RBMY1A1']

# Create figure with subplots
n_genes = len(gene_list)
n_cols = 2  # Number of columns
n_rows = int(np.ceil(n_genes / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 5*n_rows), sharex=True)
axes = axes.flatten() if n_genes > 1 else [axes]

for gene_idx, gene_name in enumerate(gene_list):
    ax = axes[gene_idx]
    
    # Filter data for this gene
    gene_data = expr_data[expr_data['gene'] == gene_name]
    
    # Determine cluster boundaries
    cluster_ranges = {}
    for cluster in cluster_order:
        cluster_data = gene_data[gene_data['cluster'] == cluster]
        if len(cluster_data) > 0:
            cluster_ranges[cluster] = (cluster_data['pseudotime'].min(), 
                                       cluster_data['pseudotime'].max())
    
    # Add background shading for cell stages
    for i, cluster in enumerate(cluster_order):
        if cluster in cluster_ranges:
            min_p, max_p = cluster_ranges[cluster]
            ax.axvspan(min_p, max_p, alpha=0.3, color=colors_bg[i], zorder=0)
            # Add cluster label at the top
            mid_p = (min_p + max_p) / 2
            ax.text(mid_p, 0.98, cluster_labels[i], 
                   transform=ax.get_xaxis_transform(),
                   ha='center', va='top', fontsize=8, fontweight='bold')
    
    # Plot each species trajectory
    for species in species_order:
        species_data = gene_data[gene_data['species'] == species].sort_values('pseudotime')
        
        if len(species_data) > 0:
            # Plot line with points
            ax.plot(species_data['pseudotime'], species_data['pred_joint'],
                   'o-', color=species_colors[species], linewidth=2.5, 
                   markersize=5, label=species, alpha=0.8)
            
            # Add confidence interval as shaded area
            ax.fill_between(species_data['pseudotime'],
                           species_data['pred_joint.low_hdi'],
                           species_data['pred_joint.upp_hdi'],
                           color=species_colors[species], alpha=0.2)
    
    # Set labels and title
    ax.set_ylabel('Expression', fontsize=11, fontweight='bold')
    ax.set_title(gene_name, fontsize=12, fontweight='bold', pad=20)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    
    # Only add legend to first panel
    if gene_idx == 0:
        ax.legend(loc='best', frameon=False, fontsize=10)
    
    # Only add x-label to bottom row
    if gene_idx >= (n_rows-1) * n_cols:
        ax.set_xlabel('Pseudotime', fontsize=11, fontweight='bold')

# Hide unused subplots
for idx in range(n_genes, len(axes)):
    axes[idx].axis('off')

plt.tight_layout()
plt.savefig('XY_paralogs_gene_expression_pseudotime.pdf', dpi=300, bbox_inches='tight')
plt.show()