In [26]:
#toy model bayesian signature inference

import torch
import pyro
import pyro.distributions as dist 
from torch.distributions import constraints
import numpy as np


#define the model in terms of M = phylogenetic signature matrix, K_denovo = number of signatures we want to infer de novo,
# K_fixed = number of signatures we already know. The fixed signatures are described by the fixed matrix Beta_fixed. 
# We provide an adjacency matrix A with binary entries which specify the correlation structure imposed by the phylogeny. 
# K = K_denovo + K_fixed
# M has dims (num_branches,96)
# theta is a vector encoding the number of mutations for each branch
# beta prior and alpha prior provide densities for the dirichlet priors for Beta_denovo and alpha (activities matrix)
# beta prior has dims (K_denovo,96), alpha prior has dims (num_branches,K_denovo)


def model_single_run(M,beta_fixed,K_denovo,beta_prior,alpha_prior):
    
    num_samples = M.size()[0]
    
    K_fixed = beta_fixed.size()[0]
    
    theta = torch.sum(M,axis = 1)
    
    #parametrize the activity matrix as theta*alpha, where theta encodes the total number of mutations of the branches 
    # and alpha are percentages of signature activity
    
   # sample alpha from a dirichlet distribution using alpha prior

    alpha = pyro.sample("activities", dist.Dirichlet(alpha_prior))
    
    # sample the extra signature profiles from dirichlet distribution
    
    beta_denovo = pyro.sample("extra_signatures", dist.Dirichlet(beta_prior))    
        
   # write the likelihood

    with pyro.plate("context",96):
        
        with pyro.plate("sample",num_samples):
    
            pyro.sample("obs", dist.Poisson(torch.matmul(torch.matmul(torch.diag(theta),alpha), 
                                            torch.cat((beta_fixed,beta_denovo),0))), obs = M)
        

In [27]:
from pyro.infer.autoguide import AutoDelta
from pyro.infer.autoguide.initialization import init_to_sample
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam  


# perform MAP estimates with autodelta

guide_single_run = AutoDelta(model_single_run, init_loc_fn = init_to_sample)  

# SVI

def inference_single_run(M,beta_fixed,K_denovo,beta_prior,alpha_prior,lr=0.05,num_steps = 200):

    pyro.clear_param_store()  # always clear the store before the inference

    # learning global parameters

    adam_params = {"lr": lr}
    optimizer = Adam(adam_params)
    elbo = Trace_ELBO()

    svi = SVI(model_single_run, guide_single_run, optimizer, loss=elbo)

#   inference

#   do gradient steps

    for step in range(num_steps):
        
        loss = svi.step(M,beta_fixed,K_denovo,beta_prior,alpha_prior) 

In [28]:
def calculate_transfer_coeff(parameters,M,beta_fixed,A,hyper_lambda):
    
    beta_denovo = parameters["AutoDelta.extra_signatures"]
    
    alpha = parameters["AutoDelta.activities"]
    
    beta = torch.cat((beta_fixed,beta_denovo),0)
    
    theta = torch.sum(M,axis = 1)
    
    K = beta.size()[0]
    
    num_sumples = M.size()[0]
    
    cos = torch.zeros(num_samples,num_samples)
    
    
    for i in range(num_samples):
        
        for j in range(num_samples):
        
            if A[i,j] == 1:
                
                M_r = theta[i]*torch.matmul(alpha[j],beta)
                
                cos[i,j] = torch.dot(M[i],M_r)/(torch.norm(M[i])*torch.norm(M_r))
                
    
    w = cos/torch.sum(cos,axis = 1)
    
    transfer_coeff = torch.zeros(num_samples,num_samples)
    
    
    for i in range(num_samples):
        
        for j in range(num_samples):
            
            if i==j:
                
                transfer_coeff[i,j]  = (1-hyper_lambda)*w[i,j]
            
            else:
                
                transfer_coeff[i,j]  = hyper_lambda*w[i,j]
     
    
    return transfer_coeff

In [29]:
def full_inference(M,A,beta_fixed,K_denovo,hyper_lambda,lr = 0.05,steps_per_iteration = 200,num_iterations = 10):


    # first indipendent run
    
    num_samples = M.size()[0]
    
    K_fixed = beta_fixed.size()[0]
            
    # step 0 : indipendent inference
    
    print("iteration ",0)
            
    alpha_prior = torch.ones(num_samples,K_denovo + K_fixed)/(K_denovo + K_fixed)
            
    beta_prior = torch.ones(K_denovo,96)/96

    inference_single_run(M,beta_fixed,K_denovo,beta_prior,alpha_prior,lr = lr, 
                                            num_steps = steps_per_iteration)
    
    
    # do iterations transferring alpha's
            
    for i in range(num_iterations):    
                    
            print("iteration ", i+1)
            
             
            #extract inferred alpha's and beta'a from pyro store
            
            parameters={}
        
            for key in pyro.get_param_store().get_all_param_names() :
    
                parameters.update({key : torch.tensor(pyro.param(key))})
        
    
            alpha_prior = parameters["AutoDelta.activities"]
        
            beta_prior = parameters["AutoDelta.extra_signatures"]
            
            
            # calculate transfer coeff
            
            transfer_coeff = calculate_transfer_coeff(parameters,M,beta_fixed,A,hyper_lambda)
            
            
            #update alpha prior with transfer coeff
            
            alpha_prior = torch.matmul(transfer_coeff,alpha_prior)
            
            
            # do inference with updates alpha_prior and beta_prior
    
            inference_single_run(M, beta_fixed, K_denovo, beta_prior, alpha_prior, lr = lr, 
                                            num_steps = steps_per_iteration)
    
            print("activities distance = " , torch.norm(parameters["AutoDelta.activities"] - alpha_prior))
            
               
    
    
    #save final inference
    
    alpha_denovo = parameters["AutoDelta.activities"]
        
    beta_denovo = parameters["AutoDelta.extra_signatures"]        
        
    SigPhylo = {"alpha" : alpha_denovo, "beta" : beta_denovo, "lambda" : hyper_lambda, "K de novo": K_denovo}
                
       
    return SigPhylo
            
            

In [30]:
#load data

import pandas as pd

X_1 = pd.read_csv("Documents/GitHub/SigPhylo/toy_model/sample_1.csv")
X_2 = pd.read_csv("Documents/GitHub/SigPhylo/toy_model/sample_2.csv")
c_1 = X_1.values[:,1:]
c_2 = X_2.values[:,1:]
M_1 = torch.tensor(np.array(c_1,dtype=float))
M_2 = torch.tensor(np.array(c_2,dtype=float))
M_1 = M_1.float()
M_2 = M_2.float()
clock_like_signatures = pd.read_csv("Documents/GitHub/SigPhylo/toy_model/beta_aging.csv")
beta_fixed = clock_like_signatures.values[:,1:]
beta_fixed = torch.tensor(np.array(beta_fixed,dtype=float))
beta_fixed = beta_fixed.float()
A = torch.ones(3,3)

In [34]:
# sample 1

full_inference(M_1,A,beta_fixed,K_denovo=2,hyper_lambda=0.1,lr = 0.05,steps_per_iteration = 200,num_iterations = 10)