In [None]:
import pyro
import torch
import torch.tensor as tensor
import pyro.distributions as dist
# from torch.distributions import Binomial, Gamma, Uniform
from pyro.distributions import Binomial, Bernoulli, Categorical, Dirichlet, DirichletMultinomial, Beta, BetaBinomial, Uniform, Gamma, Multinomial

import numpy as np

import scipy
from skopt import gp_minimize 
from scipy.stats import binom as ScipyBinom
from matplotlib import pyplot

from collections import namedtuple
import time
seed = 0

In [None]:
!pip install joblib

In [None]:
from joblib import Parallel, delayed

In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
from mvl import genData, likelihoods

In [None]:
import copy
resSim = {
        "allRes": None,
        "nEpochs": None,
        "bestRes": {
            "pis": None,
            "alphas": None,
            "PDV_c1true": None,
            "PDV_c2true": None,
            "PDV_cBothTrue": None,
            "PDV_c1inferred": None,
            "PDV_c2inferred": None,
            "PDV_cBothInferred": None,
        }
    }

cached6LargeSimRes = []

rrsSim = tensor([[1.5, 1.5, 1.5], [2, 2, 1.5], [3, 3, 1.5], [2, 2, 2], [3, 3, 2], [3, 3, 3]])
pisSim = tensor([[.01, .01, .01], [.03, .03, .03], [.05, .05, .05], [.1, .1, .1], [.1, .1, .04]])

nCases = tensor([15e3, 15e3, 6e3])
nCtrls = tensor(5e5)
i = 0
for rrsSimRun in rrsSim:
    for pisSimRun in pisSim:
        afMeanRun = 1e-4
        generatingFn = genData.v6 #can't use normal until we truncate distribution with lower rr values
        # In DSB:
        # 	No ID	ID	
        #         ASD+ADHD	684	217	
        #         ASD	3091	871	
        #         ADHD	3206	271	
        #         Control	5002	-	

        #         gnomAD	44779	(Non-Finnish Europeans in non-psychiatric exome subset)	

        #         Case total:	8340		
        #         Control total:	49781		
        # so we can use pDBoth = .1 * total_cases
        # needs tensor for shapes, otherwise "gamma_cpu not implemente for long", e.g rrShape=50.0 doesn't work...
        paramsRun = genData.genParams(rrMeans=rrsSimRun, pis=pisSimRun, afMean=afMeanRun, rrShape=tensor(50.), afShape=tensor(50.), nCases=nCases, nCtrls=nCtrls)[0]
        
        pDsRun = paramsRun["pDs"]
        pisRun = paramsRun["diseaseFractions"]
        print("params are:", paramsRun)
        
        cached6LargeSimRes.append({"params": paramsRun, "runs": []})
        for y in range(0, 10):
            start = time.time()
            r = generatingFn(**paramsRun)
            print("took", time.time() - start)
            
            resPointer = {
                **r,
                "generatingFn": generatingFn,
                "results": None,
            }

            cached6LargeSimRes[i]["runs"].append(resPointer)
            
            print(f"Run: {i}, {y}")
            
            xsRun = resPointer["altCounts"]
            afsRun = resPointer["afs"]
            affectedGenesRun = resPointer["affectedGenes"]
            unaffectedGenesRun = resPointer["unaffectedGenes"]

            runCostFnIdx = 16

            nEpochsRun = 10
            print("nEpochsRun", nEpochsRun)
            
            res = likelihoods.fitFnBivariate(xsRun, pDsRun, nEpochs=nEpochsRun, minLLThresholdCount=20, debug=True, costFnIdx=runCostFnIdx)
            bestRes = res["params"][-1]

            inferredPis = tensor(bestRes[0:3]) # 3-vector
            inferredAlphas = tensor(bestRes[3:]) # 4-vector, idx0 is P(!D|V)

            #### Calculate actual ###
            component1Afs = afsRun[affectedGenesRun[0]]
            c1true = (component1Afs / afMeanRun).mean(0)

            component2Afs = afsRun[affectedGenesRun[1]]
            c2true = (component2Afs / afMeanRun).mean(0)

            componentBothAfs = afsRun[affectedGenesRun[2]]
            cBothTrue = (componentBothAfs / afMeanRun).mean(0)

            ### calculate inferred values
            pds = tensor([1-pDsRun.sum(), *pDsRun])
            alphas = inferredAlphas.numpy()
            c1inferred = Dirichlet(tensor([alphas[0], alphas[1], alphas[0], alphas[2]]) * pds).sample([10_000]).mean(0)
            c2inferred = Dirichlet(tensor([alphas[0], alphas[0], alphas[2], alphas[2]]) * pds).sample([10_000]).mean(0)
            cBothInferred = Dirichlet(tensor([alphas[0], (alphas[1] + alphas[3]), (alphas[2] + alphas[3]), (alphas[1] + alphas[2] + alphas[3])]) * pds).sample([10_000]).mean(0)

            print(f"\n\nrun {i} results for rrs: {rrsSimRun}, pis: {pisSimRun}")
            print("Inferred pis:", inferredPis)
            print("\nP(D|V) true ans in component 1:", c1true)
            print("P(D|V) inferred in component 1:", c1inferred)
            print("\nP(D|V) true ans in component 1:", c2true)
            print("P(D|V) inferred in component both:", c2inferred)
            print("\nP(D|V) true ans in component both:", cBothTrue)
            print("P(D|V) inferred in component both:", cBothInferred,"\n\n")

            resToStore = copy.deepcopy(resSim)
            resToStore["allRes"] = res
            resToStore["nEpochs"] = nEpochsRun
            br = resToStore["bestRes"]
            br["pis"] = inferredPis
            br["alphas"] = inferredAlphas
            br["PDV_c1true"] = c1true
            br["PDV_c2true"] = c2true
            br["PDV_cBothTrue"] = cBothTrue
            br["PDV_c1inferred"] = c1inferred
            br["PDV_c2inferred"] = c2inferred
            br["PDV_cBothInferred"] = cBothInferred

            resPointer["results"] = resToStore
        
        i += 1

In [15]:
for obj in cached6LargeSimRes:
    for res in obj["runs"]:
        del res["generatingFn"]

In [97]:
afsRun = cachedData6[0]["afs"]
affectedGenesRun = cachedData6[0]["affectedGenes"]

componentBothAfs = afsRun[affectedGenesRun[2]]
a = (componentBothAfs / 1e-4).mean(0)
print("P(D1|V) in component Both", a)

component1Afs = afsRun[affectedGenesRun[0]]
b = (component1Afs / 1e-4).mean(0)
print("P(D|V) in component 1", b)

print("their weighted average", .66 * a[1] + .33 * b[1])

P(D1|V) in component Both tensor([0.7331, 0.1189, 0.1189, 0.0437])
P(D|V) in component 1 tensor([0.8604, 0.0964, 0.0096, 0.0193])
their weighted average tensor(0.1103)


In [97]:
afsRun = cachedData6[0]["afs"]
affectedGenesRun = cachedData6[0]["affectedGenes"]

componentBothAfs = afsRun[affectedGenesRun[2]]
a = (componentBothAfs / 1e-4).mean(0)
print("P(D1|V) in component Both", a)

component1Afs = afsRun[affectedGenesRun[0]]
b = (component1Afs / 1e-4).mean(0)
print("P(D|V) in component 1", b)

print("their weighted average", .66 * a[1] + .33 * b[1])

P(D1|V) in component Both tensor([0.7331, 0.1189, 0.1189, 0.0437])
P(D|V) in component 1 tensor([0.8604, 0.0964, 0.0096, 0.0193])
their weighted average tensor(0.1103)


In [16]:
np.save("mvln-sim", cached6LargeSimRes)

