In [16]:
import pysam
import numpy as np
from scipy.stats import beta

ModuleNotFoundError: No module named 'pysam'

In [11]:
# Parameters for simulation
n_variants = 100  # Number of homoplasmic variant positions to simulate
coverage = 500  # Depth per variant position
mix_ratio = 0.99  # Proportion of A in the mixture

In [12]:
# Function to simulate VAFs for a sample
def simulate_sample_vafs(n_variants, coverage, alt_allele_prob=1.0):
    alt_reads = np.random.binomial(coverage, alt_allele_prob, n_variants)
    vafs = alt_reads / coverage
    return vafs

# Simulate VAFs for samples A and B (pure homoplasmic)
vafs_A = simulate_sample_vafs(n_variants, coverage, alt_allele_prob=1.0)
vafs_B = simulate_sample_vafs(n_variants, coverage, alt_allele_prob=0.0)  # All reference

# Simulate VAFs for sample AB (mixture of A and B)
vafs_AB = mix_ratio * vafs_A + (1 - mix_ratio) * vafs_B
vafs_AB_noisy = vafs_AB + np.random.normal(0, 0.01, n_variants)  # Add noise

In [13]:
vafs_AB_noisy

array([0.98325415, 0.98360769, 0.99960885, 0.9958874 , 0.9933061 ,
       0.99450083, 1.00208858, 1.00168307, 0.97334275, 0.99209108,
       1.00502583, 1.0033277 , 0.98687686, 0.97480986, 0.97738501,
       0.98198967, 0.98097313, 0.98760926, 0.98240545, 0.98350199,
       0.97873249, 0.99067487, 0.98119931, 0.99366028, 0.98015977,
       0.98692623, 0.99040568, 0.99688864, 1.00260857, 0.98121423,
       0.99121973, 0.98457402, 0.98478799, 0.99058355, 0.96570171,
       0.97868384, 0.99586662, 1.00731491, 0.99731056, 1.00196989,
       0.97899738, 0.97685492, 0.99001291, 0.98304841, 0.97833228,
       0.99006966, 0.98028366, 0.97460646, 0.98818735, 0.99255831,
       1.01230788, 0.97364552, 0.99365155, 0.99347318, 1.00872323,
       0.98871625, 0.98182148, 0.97717706, 1.00140505, 0.98214102,
       0.98623623, 0.99194588, 0.98099075, 0.97694527, 0.97468064,
       0.9821439 , 0.98322513, 0.99204854, 0.99178184, 1.00728053,
       0.98224348, 0.99546551, 0.99481302, 0.99805601, 0.98555

In [14]:
# Function to assess mixture hypothesis using the Bayesian approach
def assess_mixture(vafs_AB, vafs_A, vafs_B, mix_ratio, coverage):
    posterior_probs = []
    for vaf_ab, vaf_a, vaf_b in zip(vafs_AB, vafs_A, vafs_B):
        # Calculate likelihoods under H0 and H1 using Beta distribution
        lh0 = beta.pdf(vaf_ab, vaf_a * coverage + 1, (1 - vaf_a) * coverage + 1)
        lh1 = beta.pdf(vaf_ab, 
                       mix_ratio * vaf_a * coverage + (1 - mix_ratio) * vaf_b * coverage + 1,
                       mix_ratio * (1 - vaf_a) * coverage + (1 - mix_ratio) * (1 - vaf_b) * coverage + 1)
        
        # Prior for H1 (mixture)
        prior_h1 = 0.01
        posterior_h1 = (lh1 * prior_h1) / (lh0 * (1 - prior_h1) + lh1 * prior_h1)
        posterior_probs.append(posterior_h1)
    
    return np.array(posterior_probs)


In [15]:
# Assess mixture hypothesis
posterior_probs = assess_mixture(vafs_AB_noisy, vafs_A, vafs_B, mix_ratio, coverage)


NameError: name 'beta' is not defined

In [None]:

# Output: Report if mixture is detected based on posterior probabilities
threshold = 0.95  # Threshold for detecting a mixture
is_mixture = np.any(posterior_probs > threshold)



In [None]:


print(f"Is the simulated sample AB detected as a mixture? {'Yes' if is_mixture else 'No'}")




