In [1]:
from numpy_sugar.linalg import economic_qs, economic_qs_linear
from numpy.random import RandomState, default_rng
from numpy import arange, concatenate, newaxis, median
from glimix_core.lmm import LMM
from scipy.stats import chi2
from numpy_sugar import epsilon
from numpy import asarray, clip, inf
import matplotlib.pyplot as plt

from struct_lmm2 import StructLMM2, create_variances, sample_phenotype, sample_phenotype_fixed_gxe

In [2]:
def compute_pvalues():
    stats["pv20"] = lrt_pvalues(stats["lml0"], stats["lml2"], stats["dof20"])

In [3]:
def lrt_pvalues(null_lml, alt_lmls, dof=1):
    """
    Compute p-values from likelihood ratios.

    These are likelihood ratio test p-values.

    Parameters
    ----------
    null_lml : float
        Log of the marginal likelihood under the null hypothesis.
    alt_lmls : array_like
        Log of the marginal likelihoods under the alternative hypotheses.
    dof : int
        Degrees of freedom.

    Returns
    -------
    pvalues : ndarray
        P-values.
    """
    lrs = clip(-2 * null_lml + 2 * asarray(alt_lmls, float), epsilon.super_tiny, inf)
    pv = chi2(df=dof).sf(lrs)
    return clip(pv, epsilon.super_tiny, 1 - epsilon.tiny)

In [26]:
random = default_rng(10)

n_individuals = 100

maf_min = 0.20
maf_max = 0.45

n_snps = 100
# n_snps = 500

# n_cells = 100
# n_cells = 10
n_cells = 2
n_cells = arange(n_individuals) + 1

In [27]:
n_cells

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [28]:
n_env_groups = 2
offset = 0.3

# indices of causal SNPs
g_causals = [5, 6]
gxe_causals = [10, 11]

# weight of genetic variance explained by GxE
r0 = 0.5
# r0 = 0
# r0 = 1

# total variance explained by genetics (G + GxE)
v0 = 0.5
# v0 = 0

# the variance explained by other terms: k = e = n = (1 - (g+gxe)/3


v = create_variances(r0, v0)

In [29]:
v

Variances(g=0.25, gxe=0.25, k=0.16666666666666666, e=0.16666666666666666, n=0.16666666666666666)

In [30]:
# create simulation object (fixed effects)
s = sample_phenotype_fixed_gxe(
        offset=offset,
        n_individuals=n_individuals,
        n_snps=n_snps,
        n_cells=n_cells,
        n_env_groups=n_env_groups,
        maf_min=maf_min,
        maf_max=maf_max,
        g_causals=g_causals,
        gxe_causals=gxe_causals,
        variances=v,
        random=random,
    )

  return array(a, dtype, copy=False, order=order)


In [33]:
# kinship matrix (block diagonal)
# plot for few individuals to check if it looks good
# plt.matshow(s.K)

In [34]:
QS = economic_qs_linear(s.Lk)
# QS = economic_qs(s.K)

In [35]:
# Test 1:
#   H0: s.y_e + s.y_k + s.y_n
#   H1: s.y_g + s.y_e + s.y_k + s.y_n
y = s.offset + s.y_g + 0*s.y_gxe + s.y_e + s.y_k + s.y_n
M = concatenate([s.M, s.E], axis=1)
lmm = LMM(y, M, QS, restricted=False)
lmm.fit(verbose=False)

In [36]:
scanner = lmm.get_fast_scanner()
data1 = scanner.fast_scan(s.G)
# Asserting to make sure that DoF==1
assert data1["effsizes1"].ndim == 1

Scanning: 100%|██████████| 50/50 [00:00<00:00, 554.66it/s]


In [37]:
dof = 1
lml0 = scanner.null_lml()
lml1 = data1["lml"]
pv = test1_pvalues = lrt_pvalues(lml0, lml1, dof)
# show p-values
# expected: 5,6 significant (causal); 10,11 not sign (not testing for GxE); median of rest ~0.5 (from uniform distr)
print(pv[5], pv[6], pv[10], pv[11], median(pv[12:]))

1.0181690710706422e-09 1.1929817639608787e-06 0.3339458105547522 0.7117697966232103 0.5376754211992842


In [24]:
###

In [38]:
# build K * E
K_E = s.K * (s.E @ s.E.T)
QS = economic_qs(K_E)
# QS = economic_qs(s.K)

In [39]:
# plt.matshow(K_E)

In [40]:
# Test 2:
#   H0: s.y_g + s.y_e + s.y_k + s.y_n
#   H1: s.y_gxe + s.y_g + s.y_e + s.y_k + s.y_n
y = s.offset + s.y_gxe + s.y_g + s.y_e + s.y_k + s.y_n
assert min(abs(y - s.y)) < 1e-10
data2 = {"lml0": [], "lml1": []}
dof = 1
for g in s.G.T:
    g = g[:, newaxis]
    M = concatenate([s.M, g, s.E], axis=1)
    lmm = LMM(y, M, QS, restricted=False)
    lmm.fit(verbose=False)
    scanner = lmm.get_fast_scanner()
    d = scanner.fast_scan(s.E * g)
    # Asserting to make sure that DoF==1
    assert d["effsizes1"].ndim == 1
    lml0 = scanner.null_lml()
    lml1 = d["lml"]
    data2["lml0"].append(lml0)
    data2["lml1"].append(lml1)

Scanning: 100%|██████████| 1/1 [00:00<00:00, 513.06it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 847.85it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 649.88it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 643.40it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 546.63it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 500.69it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 650.78it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 599.44it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 339.37it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 460.26it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 834.19it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 794.53it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 827.12it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 630.63it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 605.06it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 755.46it/s]
Scanning: 100%|██████████| 1/1 [00:00<00:00, 758.60it/s]
Scanning: 100%|██████████| 1/1 

In [41]:
lml0 = asarray(data2["lml0"])
lml1 = concatenate(data2["lml1"])
pv = test2_pvalues = lrt_pvalues(lml0, lml1, dof)
# show p-values
# expected: 5,6 not sign (not testing for G); 10,11 significant (causal GxE); median of rest ~0.5 (from uniform distr)
print(pv[5], pv[6], pv[10], pv[11], median(pv[12:]))

0.736330519107421 0.16707612676294226 1.1684778820678078e-09 3.6126708880348605e-11 0.5557717336710954
