# Create params.csv

In [1]:
import pandas as pd
import numpy as np

In [2]:
import sys
sys.path.append('../scripts/')
from settings import DEFAULT_PARAMS

In [3]:
from itertools import product

In [4]:
DEFAULT_PARAMS

{'n_snps': 25,
 'n_individuals': 100,
 'n_genes': 100,
 'cells_per_individual': 'fixed',
 'maf_min': 0.2,
 'maf_max': 0.45,
 'env': 'endo',
 'd_env': 10,
 'offset': 2.5,
 'n_causal_g': 1,
 'n_causal_gxe': 1,
 'n_causal_shared': 0,
 'r0': 0.2,
 'v0': 0.1,
 'likelihood': 'gaussian',
 'nb_dispersion': 1.5,
 'p_dropout': 0.05,
 'normalize': False,
 'dirichlet_alpha': 5,
 'seed': 124823}

## Parameters for null simulation

In [5]:
env = ['endo', 'cluster_uniform', 'cluster_biased']
likelihood = ['gaussian', 'negbin', 'zinb']
cells_per_individual = ['fixed', 'variable']
n_snps = 100

df = pd.DataFrame(
    list(product(cells_per_individual, env, likelihood)),
    columns=['cells_per_individual', 'env', 'likelihood'])

# if not gaussian, apply quantile normalization
df['normalize'] = df['likelihood'] != 'gaussian'

# use 10 PCs for the Endoderm environment and 5 for all others
df['d_env'] = np.where(df['env'] == 'endo', 10, 5)

df_sub = df.copy()
# simulate with DEFAULT genetic effect strength
df['v0'] = DEFAULT_PARAMS['v0']
# simulate with no persistent effect
df_sub['v0'] = 0
df = pd.concat([df, df_sub], 0)

df['n_causal_g'] = n_snps
df['n_causal_gxe'] = 0
df['r0'] = 0
df['n_snps'] = n_snps
df['n_genes'] = 1
df = df.reset_index(drop=True)

In [6]:
# reorder
params = df[[
    'n_genes', 
    'n_snps', 
    'n_causal_g', 
    'n_causal_gxe', 
    'r0', 
    'v0', 
    'likelihood', 
    'normalize', 
    'cells_per_individual', 
    'env', 
    'd_env'
]]

In [7]:
params.shape

(36, 11)

## Parameters for power simulations

In [8]:
env = ['endo', 'cluster_uniform', 'cluster_biased']
likelihood = ['gaussian', 'negbin', 'zinb']
n_causal_gxe = 1
cells_per_individual = ['fixed', 'variable']
r0 = [0.1, 0.2, 0.3]
n_snps = 25

df = pd.DataFrame(
    list(product(cells_per_individual, env, r0, likelihood)),
    columns=['cells_per_individual', 'env', 'r0', 'likelihood'])

# if not gaussian, apply quantile normalization
df['normalize'] = df['likelihood'] != 'gaussian'

# use 10 PCs for the Endoderm environment and 5 for all others
df['d_env'] = np.where(df['env'] == 'endo', 10, 5)

df_sub = df.copy()
df['v0'] = DEFAULT_PARAMS['v0']
df['n_snps'] = 1
df['n_genes'] = 100
df['n_causal_g'] = 0
df['n_causal_gxe'] = 1

df_sub = df.query('env == "endo"').copy()
df_sub['d_env'] = 1

df = pd.concat([df, df_sub], 0)
df = df.reset_index(drop=True)

In [9]:
df.shape

(72, 11)

## Combine

In [10]:
params = pd.concat([params, df], 0).reset_index(drop=True)

In [11]:
pd.set_option('display.max_rows', 150)

In [12]:
params

Unnamed: 0,n_genes,n_snps,n_causal_g,n_causal_gxe,r0,v0,likelihood,normalize,cells_per_individual,env,d_env
0,1,100,100,0,0.0,0.1,gaussian,False,fixed,endo,10
1,1,100,100,0,0.0,0.1,negbin,True,fixed,endo,10
2,1,100,100,0,0.0,0.1,zinb,True,fixed,endo,10
3,1,100,100,0,0.0,0.1,gaussian,False,fixed,cluster_uniform,5
4,1,100,100,0,0.0,0.1,negbin,True,fixed,cluster_uniform,5
5,1,100,100,0,0.0,0.1,zinb,True,fixed,cluster_uniform,5
6,1,100,100,0,0.0,0.1,gaussian,False,fixed,cluster_biased,5
7,1,100,100,0,0.0,0.1,negbin,True,fixed,cluster_biased,5
8,1,100,100,0,0.0,0.1,zinb,True,fixed,cluster_biased,5
9,1,100,100,0,0.0,0.1,gaussian,False,variable,endo,10


## Save

In [13]:
params.to_csv('../params.csv', index=False)