# Basic BVAS demo using simulated data

In [25]:
from bvas import simulate_data, BVASSelector
from bvas.map import map_inference
import pandas as pd
import numpy as np

### Simulate data

In [2]:
data = simulate_data(num_alleles=100, 
                     duration=26, 
                     num_variants=100, 
                     num_regions=10, 
                     N0=10 ** 4,
                     k=0.1, 
                     seed=0, 
                     sampling_rate=10, 
                     strategy='global-median')

In [3]:
# inspect simulated data
for k, v in data.items():
    print(k, v.shape)
    
print("\nEstimated effective population size: {:.1f}".format(data['estimated_nu_eff'].item()))

Y torch.Size([100])
Gamma torch.Size([100, 100])
estimated_nu_eff (1,)
true_betas torch.Size([100])

Estimated effective population size: 490.3


### Instantiate BVASSelector object

In [4]:
# create names for our 100 alleles (the first 10 alleles are non-neutral in the simulation)
mutations = ["Causal{}".format(k) for k in range(1, 11)] 
mutations += ["Spurious{}".format(k) for k in range(11, 101)] 

selector = BVASSelector(data['Y'], 
                        data['Gamma'], 
                        mutations, 
                        S=5.0,
                        tau=100.0)

### Run BVAS MCMC-based inference

In [5]:
selector.run(T=2000, T_burnin=500, seed=1)

  0%|          | 0/2500 [00:00<?, ?it/s]

### Inspect results

The results can be found in the `selector.summary` Pandas DataFrame.

- We find that 8 of the 10 true causal alleles are assigned large PIPs
- We find that 2 of the 10 true causal alleles are missed 
    - Specifically we miss the weakest effects, namely Causal1 and Causal6
- We find that no spurious alleles are assigned large PIPs
- We see that the Beta estimates are regularized somewhat towards zero

In [6]:
print(selector.summary.iloc[:15][['PIP', 'Beta', 'BetaStd', 'Rank']])

                 PIP      Beta   BetaStd  Rank
Causal4     0.999999  0.051191  0.006197     1
Causal5     0.999999  0.065880  0.006351     2
Causal10    0.999999 -0.068456  0.009001     3
Causal9     0.999999 -0.066190  0.010773     4
Causal3     0.999917  0.038770  0.007014     5
Causal8     0.889151 -0.027527  0.012958     6
Causal7     0.254103 -0.005840  0.011050     7
Causal2     0.137435  0.004219  0.009328     8
Spurious89  0.022826 -0.000246  0.002102     9
Spurious24  0.017642  0.000132  0.001421    10
Spurious70  0.015575  0.000216  0.001960    11
Spurious80  0.013852  0.000077  0.001198    12
Spurious21  0.011893  0.000030  0.000733    13
Spurious16  0.009505 -0.000049  0.000814    14
Spurious44  0.009007 -0.000085  0.001199    15


In [9]:
# print true betas for the causal coefficients
for mutation, beta in zip(mutations[:10], data['true_betas'][:10]):
    print("[{}]\t{:.2f}".format(mutation, beta.item()))

[Causal1]	0.01
[Causal2]	0.02
[Causal3]	0.04
[Causal4]	0.06
[Causal5]	0.08
[Causal6]	-0.01
[Causal7]	-0.02
[Causal8]	-0.04
[Causal9]	-0.06
[Causal10]	-0.08


In [10]:
# the remaining coefficients are all zero
assert data['true_betas'][10:].min().item() == data['true_betas'][10:].max().item() == 0.0

# Compare to MAP inference

Let's compare to Maximum A posteriorir (i.e. MAP) inference as in [Inferring effects of mutations on SARS-CoV-2 transmission from genomic surveillance data](https://www.medrxiv.org/content/10.1101/2021.12.31.21268591v2).

In [64]:
map_results = map_inference(data['Y'], data['Gamma'], taus=[2048.0])
inferred_beta = map_results['map_2048.0']['beta']

In [65]:
# package results as Pandas DataFrame
inferred_beta = pd.DataFrame(inferred_beta, index=mutations, columns=['Beta'])
inferred_beta['BetaAbs'] = np.fabs(inferred_beta)
inferred_beta = inferred_beta.sort_values(by='BetaAbs', ascending=False)
inferred_beta['Rank'] = 1 + np.arange(inferred_beta.shape[0])
inferred_beta = inferred_beta[['Beta', 'Rank']]

In [67]:
# MAP places 6/10 of the causal alleles at the top
inferred_beta.iloc[:15]

Unnamed: 0,Beta,Rank
Causal9,-0.053871,1
Causal5,0.049838,2
Causal10,-0.048263,3
Causal4,0.045866,4
Causal3,0.027333,5
Causal8,-0.021542,6
Spurious80,0.020984,7
Spurious44,-0.017381,8
Spurious68,-0.015019,9
Spurious61,0.014249,10
