# Create params.csv

In [1]:
import pandas as pd
import numpy as np

In [2]:
import sys
sys.path.append('../scripts/')
from settings import DEFAULT_PARAMS

In [3]:
from itertools import product

In [4]:
DEFAULT_PARAMS

{'n_snps': 1,
 'n_individuals': 50,
 'n_genes': 150,
 'cells_per_individual': 'fixed',
 'n_cells': 50,
 'maf_min': 0.2,
 'maf_max': 0.45,
 'env': 'endo',
 'n_env': 20,
 'n_env_gxe': 20,
 'n_env_tested': 20,
 'offset': 2.5,
 'n_causal_g': 1,
 'n_causal_gxe': 1,
 'n_causal_shared': 1,
 'r0': 0.5,
 'v0': 0.03,
 'likelihood': 'gaussian',
 'nb_dispersion': 1.5,
 'p_dropout': 0.05,
 'normalize': True,
 'dirichlet_alpha': 2,
 'seed': 19350,
 'model': 'structlmm2'}

## Parameters for calibration assessment

In [5]:
likelihood = ['gaussian', 'negbin']
model = ['structlmm', 'structlmm2']
cells_per_individual = ['fixed']

# simulate with and without persistent effect
v0 = [DEFAULT_PARAMS['v0'], 0]

# simulate with and without repeat structure
n_cells = [50, 1]

df = pd.DataFrame(
    list(product(n_cells, model, likelihood, v0)),
    columns=['n_cells', 'model', 'likelihood', 'v0'])

n_snps = 200
df['n_causal_g'] = n_snps
df['n_causal_shared'] = 0
df['n_causal_gxe'] = 0
df['r0'] = 0
df['n_snps'] = n_snps
df['n_genes'] = 1

df['n_individuals'] = DEFAULT_PARAMS['n_individuals']
df.loc[df['n_cells'] == 1, 'n_individuals'] = max(n_cells) * DEFAULT_PARAMS['n_individuals']

for k, v in DEFAULT_PARAMS.items():
    if k not in df.columns:
        df[k] = v

In [6]:
params = df.reset_index(drop=True)

## Parameters for power simulations

In [7]:
cells_per_individual = ['fixed', 'variable']
likelihood = ['gaussian', 'negbin']
model = ['structlmm2', 'structlmm2_fixed']
# seed = np.random.randint(1, 50000, 10)
# seed = [48104, 19350, 35955, 44900, 13387, 45461, 16116, 32306, 619, 25220]
seed = [19350, 44900, 45461, 32306, 619, 25220]

In [8]:
def create_alternative_params_df(n_env_gxe, n_env_tested, seed, r0):
    df = pd.DataFrame(
        list(product(cells_per_individual, model, likelihood, r0, n_env_gxe, n_env_tested, seed)),
        columns=['cells_per_individual', 'model', 'likelihood', 'r0', 'n_env_gxe', 'n_env_tested', 'seed'])
    for k, v in DEFAULT_PARAMS.items():
        if k not in df.columns:
            df[k] = v
    return df

In [9]:
r0 = [0, 0.25, 0.5, 0.75, 1]
n_env_gxe = [20]
n_env_tested = [20]

params = pd.concat([params, create_alternative_params_df(n_env_gxe, n_env_tested, seed, r0)], 0)

In [10]:
r0 = [DEFAULT_PARAMS['r0']]
n_env_gxe = [2, 5, 10, 15, 20]
n_env_tested = [20]

params = pd.concat([params, create_alternative_params_df(n_env_gxe, n_env_tested, seed, r0)], 0)

In [11]:
r0 = [DEFAULT_PARAMS['r0']]
n_env_gxe = [20]
n_env_tested = [2, 5, 10, 15, 20]

params = pd.concat([params, create_alternative_params_df(n_env_gxe, n_env_tested, seed, r0)], 0)

In [12]:
params = params.reset_index(drop=True)

## Change fraction of genetic variance for negative binomial simulations

In [13]:
ids = params.query('likelihood == "negbin" and v0 > 0').index

In [14]:
params.loc[ids, 'v0'] = 0.15

## Summarize

In [15]:
params = params.reset_index(drop=True)
params = params.loc[:, (params != params.iloc[0]).any()].drop_duplicates()
params

Unnamed: 0,n_cells,model,likelihood,v0,n_causal_g,n_causal_shared,n_causal_gxe,r0,n_snps,n_genes,n_individuals,cells_per_individual,n_env_gxe,n_env_tested,seed
0,50,structlmm,gaussian,0.03,200,0,0,0.0,200,1,50,fixed,20,20,19350
1,50,structlmm,gaussian,0.00,200,0,0,0.0,200,1,50,fixed,20,20,19350
2,50,structlmm,negbin,0.15,200,0,0,0.0,200,1,50,fixed,20,20,19350
3,50,structlmm,negbin,0.00,200,0,0,0.0,200,1,50,fixed,20,20,19350
4,50,structlmm2,gaussian,0.03,200,0,0,0.0,200,1,50,fixed,20,20,19350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,50,structlmm2_fixed,negbin,0.15,1,1,1,0.5,1,150,50,variable,20,15,44900
726,50,structlmm2_fixed,negbin,0.15,1,1,1,0.5,1,150,50,variable,20,15,45461
727,50,structlmm2_fixed,negbin,0.15,1,1,1,0.5,1,150,50,variable,20,15,32306
728,50,structlmm2_fixed,negbin,0.15,1,1,1,0.5,1,150,50,variable,20,15,619


## Save

In [16]:
params.to_csv('../params.csv', index=False)