In [1]:
import numpy as np
import scipy as sp
import pandas as pd
from sklearn import linear_model


### meta-Poisson model
For meta cells $i=1,...,M$, let $S_i$ be the set of cells for meta cell $i$:
- Average cell state-specific ATAC: $\theta_i \sim N(0,0.25)$ (set min=-3 and max=3 for stability)
 
 For cells $c \in S_i$: 
 - True ATAC state: $\lambda_c \sim LogNormal(\theta_i, \sigma_\lambda^2)$ (to make sure $\lambda_c$ is positive; values of $\sigma_\lambda^2$ will be varied)
 - ATAC: $X_c \sim Poi(\lambda_c)$
 - RNA: $Y_c \sim Poi(\exp(\lambda_c b + N(0, 0.01)))$

Observation: 
- When $\sigma_\lambda^2$ is small, performance is good
- When $\sigma_\lambda^2$ is big, performance is bad

In [46]:
n_meta = 1000
n_cell_per_meta = 25
n_cell = n_meta * n_cell_per_meta
n_rep = 25

# for coef_b in [0, 1]:
for coef_b in [1]:
#     for sigma_lambda in [0.01, 0.1, 0.2]:
    for sigma_lambda in [0.001]:
        print('====================================================')
        print('coef_b=%0.1f, sigma_lambda = %0.2e' % (coef_b, sigma_lambda))
        print('====================================================')

        dic_data = {}
        for i_rep in range(n_rep):
            np.random.seed(i_rep)
            v_theta = (np.random.randn(n_meta) * 0.5).clip(min=-3, max=3)
            v_lambda = np.zeros(n_meta*n_cell_per_meta)
            for i in range(n_meta):
                temp_v = np.exp(np.random.randn(n_cell_per_meta) * sigma_lambda + v_theta[i])
                v_lambda[i*n_cell_per_meta : (i+1)*n_cell_per_meta] = temp_v

            v_x = np.random.poisson(v_lambda)
            v_y = np.random.poisson(np.exp(v_lambda*coef_b + np.random.randn(n_cell)*0.1))
            dic_data[i_rep] = {
                'v_lambda' : v_lambda,
                'v_x' : v_x,
                'v_y' : v_y
            }

        # method 1: Poisson regression of v_y against v_x
        v_beta_hat = []
        for i_rep in range(n_rep):
            v_lambda = dic_data[i_rep]['v_lambda']
            v_x = dic_data[i_rep]['v_x']
            v_y = dic_data[i_rep]['v_y']
            clf = linear_model.PoissonRegressor()
            res = clf.fit(v_x.reshape([n_cell, 1]), v_y)
            v_beta_hat.append(clf.coef_[0])
        print('method 1: Poisson regression of v_y against v_x')
        beta_mean = np.mean(v_beta_hat)
        beta_se = np.std(v_beta_hat) / np.sqrt(n_rep)
        print('beta_mean=%0.3f, beta_se=%0.3f, beta_z=%0.3f' % (beta_mean, beta_se, beta_mean/beta_se))
        print('')

        # method 2 (oracle): Poisson regression of v_y against v_lambda
        v_beta_hat = []
        for i_rep in range(n_rep):
            v_lambda = dic_data[i_rep]['v_lambda']
            v_x = dic_data[i_rep]['v_x']
            v_y = dic_data[i_rep]['v_y']
            clf = linear_model.PoissonRegressor()
            res = clf.fit(v_lambda.reshape([n_cell, 1]), v_y)
            v_beta_hat.append(clf.coef_[0])
        print('method 2 (oracle): Poisson regression of v_y against v_lambda')
        beta_mean = np.mean(v_beta_hat)
        beta_se = np.std(v_beta_hat) / np.sqrt(n_rep)
        print('beta_mean=%0.3f, beta_se=%0.3f, beta_z=%0.3f' % (beta_mean, beta_se, beta_mean/beta_se))
        print('')

        # method 3 (oracle): Poisson regression of v_y against v_lambda_bar (meta_cell_averaged v_lambda)
        v_beta_hat = []
        for i_rep in range(n_rep):
            v_lambda = dic_data[i_rep]['v_lambda']
            v_x = dic_data[i_rep]['v_x']
            v_y = dic_data[i_rep]['v_y']
            v_lambda_bar = np.zeros(n_cell)
            for i in range(n_meta):
                v_lambda_bar[i*n_cell_per_meta : (i+1)*n_cell_per_meta] = \
                    np.mean(v_lambda[i*n_cell_per_meta : (i+1)*n_cell_per_meta])
            clf = linear_model.PoissonRegressor()
            res = clf.fit(v_lambda_bar.reshape([n_cell, 1]), v_y)
            v_beta_hat.append(clf.coef_[0])
        print('method 3 (oracle): Poisson regression of v_y against v_lambda_bar'
              ' (meta_cell_averaged v_lambda)')
        beta_mean = np.mean(v_beta_hat)
        beta_se = np.std(v_beta_hat) / np.sqrt(n_rep)
        print('beta_mean=%0.3f, beta_se=%0.3f, beta_z=%0.3f' % (beta_mean, beta_se, beta_mean/beta_se))
        print('')

        # method 4: Poisson regression of v_y against v_x_bar (meta_cell_averaged v_x)
        v_beta_hat = []
        for i_rep in range(n_rep):
            v_lambda = dic_data[i_rep]['v_lambda']
            v_x = dic_data[i_rep]['v_x']
            v_y = dic_data[i_rep]['v_y']
            v_x_bar = np.zeros(n_cell)
            for i in range(n_meta):
                v_x_bar[i*n_cell_per_meta : (i+1)*n_cell_per_meta] = \
                    np.mean(v_x[i*n_cell_per_meta : (i+1)*n_cell_per_meta])
            clf = linear_model.PoissonRegressor()
            res = clf.fit(v_x_bar.reshape([n_cell, 1]), v_y)
            v_beta_hat.append(clf.coef_[0])
        print('method 4: Poisson regression of v_y against v_x_bar (meta_cell_averaged v_x)')
        beta_mean = np.mean(v_beta_hat)
        beta_se = np.std(v_beta_hat) / np.sqrt(n_rep)
        print('beta_mean=%0.3f, beta_se=%0.3f, beta_z=%0.3f' % (beta_mean, beta_se, beta_mean/beta_se))
        print('')

coef_b=1.0, sigma_lambda = 1.00e-03
method 1: Poisson regression of v_y against v_x
beta_mean=0.344, beta_se=0.021, beta_z=16.252

method 2 (oracle): Poisson regression of v_y against v_lambda
beta_mean=0.831, beta_se=0.019, beta_z=44.514

method 3 (oracle): Poisson regression of v_y against v_lambda_bar (meta_cell_averaged v_lambda)
beta_mean=0.831, beta_se=0.019, beta_z=44.514

method 4: Poisson regression of v_y against v_x_bar (meta_cell_averaged v_x)
beta_mean=0.785, beta_se=0.020, beta_z=38.995

