In [2]:
import pyro
import torch
import torch.tensor as tensor
import pyro.distributions as dist
# from torch.distributions import Binomial, Gamma, Uniform
from pyro.distributions import Binomial, Bernoulli, Categorical, Dirichlet, DirichletMultinomial, Beta, BetaBinomial, Uniform, Gamma, Multinomial

import numpy as np

import scipy
from skopt import gp_minimize 
from scipy.stats import binom as ScipyBinom
from matplotlib import pyplot

from collections import namedtuple
import time
seed = 0

In [None]:
!pip install joblib

In [None]:
from joblib import Parallel, delayed

In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
from mvl import genData, likelihoods

In [None]:
import copy
resSim = {
        "allRes": None,
        "nEpochs": None,
        "bestRes": {
            "pis": None,
            "alphas": None,
            "PDV_c1true": None,
            "PDV_c2true": None,
            "PDV_cBothTrue": None,
            "PDV_c1inferred": None,
            "PDV_c2inferred": None,
            "PDV_cBothInferred": None,
        }
    }

cached6NormalSimRes = []

rrsSim = tensor([[2, 2, 1.5]])
pisSim = tensor([[.05, .05, .05]])

nCases = tensor([15e3, 15e3, 6e3])
nCtrls = tensor(5e5)
i = 0
for rrsSimRun in rrsSim:
    for pisSimRun in pisSim:
        afMeanRun = 1e-4
        generatingFn = genData.v6normal
        # In DSB:
        # 	No ID	ID	
        #         ASD+ADHD	684	217	
        #         ASD	3091	871	
        #         ADHD	3206	271	
        #         Control	5002	-	

        #         gnomAD	44779	(Non-Finnish Europeans in non-psychiatric exome subset)	

        #         Case total:	8340		
        #         Control total:	49781		
        # so we can use pDBoth = .1 * total_cases
        # needs tensor for shapes, otherwise "gamma_cpu not implemente for long", e.g rrShape=50.0 doesn't work...
        paramsRun = genData.genParams(rrMeans=rrsSimRun, pis=pisSimRun, afMean=afMeanRun, rrShape=tensor(50.), afShape=tensor(50.), nCases=nCases, nCtrls=nCtrls)[0]
        
        pDsRun = paramsRun["pDs"]
        pisRun = paramsRun["diseaseFractions"]
        print("params are:", paramsRun)
        
        cached6NormalSimRes.append({"params": paramsRun, "runs": []})
        for y in range(0, 10):
            start = time.time()
            r = generatingFn(**paramsRun)
            print("took", time.time() - start)
            
            resPointer = {
                **r,
                "generatingFn": generatingFn,
                "results": None,
            }

            cached6NormalSimRes[i]["runs"].append(resPointer)
            
            print(f"Run: {i}, {y}")
            
            xsRun = resPointer["altCounts"]
            afsRun = resPointer["afs"]
            affectedGenesRun = resPointer["affectedGenes"]
            unaffectedGenesRun = resPointer["unaffectedGenes"]

            runCostFnIdx = 16

            nEpochsRun = 10
            print("nEpochsRun", nEpochsRun)
            
            res = likelihoods.fitFnBivariate(xsRun, pDsRun, nEpochs=nEpochsRun, minLLThresholdCount=20, debug=True, costFnIdx=runCostFnIdx)
            bestRes = res["params"][-1]

            inferredPis = tensor(bestRes[0:3]) # 3-vector
            inferredAlphas = tensor(bestRes[3:]) # 4-vector, idx0 is P(!D|V)

            #### Calculate actual ###
            component1Afs = afsRun[affectedGenesRun[0]]
            c1true = (component1Afs / afMeanRun).mean(0)

            component2Afs = afsRun[affectedGenesRun[1]]
            c2true = (component2Afs / afMeanRun).mean(0)

            componentBothAfs = afsRun[affectedGenesRun[2]]
            cBothTrue = (componentBothAfs / afMeanRun).mean(0)

            ### calculate inferred values
            pds = tensor([1-pDsRun.sum(), *pDsRun])
            alphas = inferredAlphas.numpy()
            c1inferred = Dirichlet(tensor([alphas[0], alphas[1], alphas[0], alphas[2]]) * pds).sample([10_000]).mean(0)
            c2inferred = Dirichlet(tensor([alphas[0], alphas[0], alphas[2], alphas[2]]) * pds).sample([10_000]).mean(0)
            cBothInferred = Dirichlet(tensor([alphas[0], (alphas[1] + alphas[3]), (alphas[2] + alphas[3]), (alphas[1] + alphas[2] + alphas[3])]) * pds).sample([10_000]).mean(0)

            print(f"\n\nrun {i} results for rrs: {rrsSimRun}, pis: {pisSimRun}")
            print("Inferred pis:", inferredPis)
            print("\nP(D|V) true ans in component 1:", c1true)
            print("P(D|V) inferred in component 1:", c1inferred)
            print("\nP(D|V) true ans in component 1:", c2true)
            print("P(D|V) inferred in component both:", c2inferred)
            print("\nP(D|V) true ans in component both:", cBothTrue)
            print("P(D|V) inferred in component both:", cBothInferred,"\n\n")

            resToStore = copy.deepcopy(resSim)
            resToStore["allRes"] = res
            resToStore["nEpochs"] = nEpochsRun
            br = resToStore["bestRes"]
            br["pis"] = inferredPis
            br["alphas"] = inferredAlphas
            br["PDV_c1true"] = c1true
            br["PDV_c2true"] = c2true
            br["PDV_cBothTrue"] = cBothTrue
            br["PDV_c1inferred"] = c1inferred
            br["PDV_c2inferred"] = c2inferred
            br["PDV_cBothInferred"] = cBothInferred

            resPointer["results"] = resToStore
        
        i += 1

In [None]:
import json
# torch.save("./mvln-sim-mvn", tensor(cached6NormalSimRes))
# json.dumps("./mvln-sim-mvn.json", cached6NormalSimRes)
cached6NormalSimRes

In [5]:
res = np.load("./mvln-sim-mvln.npy", allow_pickle=True)


IN


In [6]:
resByParams = []
for runSet in res:
    params = (runSet["params"]["diseaseFractions"], runSet["params"]["rrMeans"], runSet["params"]["rrShape"])
#     print("param", params)
    res = []
#     if params not in resByParam:
#         resByParams[params] = []
    
    for run in runSet["runs"]:
        if run is None or "results" not in run or run["results"] is None:
            print(f"no results found for {params}")
            continue
        res.append(run["results"])
    resByParams.append([params, res])

np.save("mvln-sim-mvln-results", resByParams)




In [10]:
resByParams = np.load("mvln-sim-mvln-results.npy", allow_pickle=True)


1

In [9]:

# plt.subplot([1,len(resByParams)])
i = 0
with open("mvln-sim-mvln-res.tsv", "w") as file:
    file.write(f"Notes: 15000 samples1, 15000 samples2, 6000 samplesBoth; rrs generated from normal distribution with. 1 variance, .4 covariance, and individual effect rr summed with shared-effect rr in genes affecting both conditions\n")
    file.write(f"\tmean\tstd\n")
    for res in resByParams:
        i += 1

        paramsRun = res[0]
        resRun = res[1]

        pis = tensor([x["bestRes"]["pis"].numpy() for x in resRun])
        PDV_c1true = tensor([x["bestRes"]["PDV_c1true"].numpy() for x in resRun])
        PDV_c2true = tensor([x["bestRes"]["PDV_c2true"].numpy() for x in resRun])
        PDV_c3true = tensor([x["bestRes"]["PDV_cBothTrue"].numpy() for x in resRun])
        PDV_c1inferred = tensor([x["bestRes"]["PDV_c1inferred"].numpy() for x in resRun])
        PDV_c2inferred = tensor([x["bestRes"]["PDV_c2inferred"].numpy() for x in resRun])
        PDV_c3inferred = tensor([x["bestRes"]["PDV_cBothInferred"].numpy() for x in resRun])

        file.write(f"\n\ntrue params: \t{paramsRun} \n\n")

        file.write(f"pi\t {pis.mean(0).numpy()} \t  {pis.std(0).numpy()} \n")

        file.write(f"PDV_c1inferred \t {PDV_c1inferred.mean(0).numpy()}\t {PDV_c1inferred.std(0).numpy()}\n")
        file.write(f"PDV_c1true \t {PDV_c1true.mean(0).numpy()} \t {PDV_c1true.std(0).numpy()}\n")
        file.write(f"PDV_c2inferred \t {PDV_c2inferred.mean(0).numpy()} \t {PDV_c2inferred.std(0).numpy()}\n")
        
        file.write(f"PDV_c2true \t {PDV_c2true.mean(0).numpy()} \t {PDV_c2true.std(0).numpy()}\n")
        
        file.write(f"PDV_c3inferred \t {PDV_c3inferred.mean(0).numpy()} \t {PDV_c3inferred.std(0).numpy()}\n")
        
        file.write(f"PDV_c3true \t {PDV_c3true.mean(0).numpy()} \t {PDV_c3true.std(0).numpy()}\n")

    #     plt.figure(i)
    #     plt.plot(t, s1)
    #     plt.plot(t, 2*s1)
        # plt.subplot(222)
        # plt.plot(t, 2*s1)

In [16]:
with open('./mvln-sim-mvn.json', 'w') as outfile:
    json.dump(cached6NormalSimRes, outfile)

TypeError: Object of type Tensor is not JSON serializable