# Prepare input data for runtime evaluation

For the runtime evaluation we use simulated individuals / genotypes in combination with the observed single-cell expression profiles.

In [1]:
import sys

sys.path.append('..')
import utils.settings as settings

import pandas as pd
import numpy as np

import scanpy as sc

In [2]:
N_CELLS = settings.RUNTIME_N_CELLS
N_DONORS = settings.RUNTIME_N_DONORS 
N_SNPS = settings.RUNTIME_N_SNPS
N_GENES = settings.RUNTIME_N_GENES

In [5]:
rng = np.random.default_rng(123)

## Create data 

In [6]:
DONORS = ['donor_%d' % i for i in range(max(N_DONORS))]
SNPS = ['snp_%d' % i for i in range(N_SNPS)]

In [8]:
K = pd.DataFrame(np.eye(max(N_DONORS)), index=DONORS, columns=DONORS) 

In [9]:
adata = sc.read(settings.DATA_DIR + '/filtered/adata.h5ad')
adata = adata[:, rng.choice(adata.shape[1], N_GENES, replace=False)]
adata.var['snpID'] = ','.join(SNPS)

  adata.var['snpID'] = ','.join(SNPS)


In [10]:
G = pd.DataFrame(rng.choice([0, 1, 2], size=[max(N_DONORS), N_SNPS]), index=DONORS, columns=SNPS)

In [11]:
n_donors = 100
for n_cells in N_CELLS:
    adata_sub = adata[rng.choice(adata.shape[0], n_cells, replace=False), :].copy()
    cells_per_donor = adata_sub.shape[0] // n_donors
    donors = rng.permutation(np.tile(DONORS[:n_donors], cells_per_donor)).tolist()
    donors += rng.choice(DONORS[:n_donors], adata_sub.shape[0] - cells_per_donor * n_donors).tolist()
    adata_sub.obs['donor_long_id'] = donors
    sc.write(settings.DATA_DIR + f'/filtered/adata_runtime_ncells~{n_cells}_ndonors~{n_donors}.h5ad', adata_sub)

In [12]:
n_cells = 10000
for n_donors in N_DONORS:
    adata_sub = adata[rng.choice(adata.shape[0], n_cells, replace=False), :].copy()
    cells_per_donor = adata_sub.shape[0] // n_donors
    donors = rng.permutation(np.tile(DONORS[:n_donors], cells_per_donor)).tolist()
    donors += rng.choice(DONORS[:n_donors], adata_sub.shape[0] - cells_per_donor * n_donors).tolist()
    adata_sub.obs['donor_long_id'] = donors
    sc.write(settings.DATA_DIR + f'/filtered/adata_runtime_ncells~{n_cells}_ndonors~{n_donors}.h5ad', adata_sub)

In [13]:
K.to_csv(settings.DATA_DIR + '/filtered/kinship_runtime.csv')
G.to_csv(settings.DATA_DIR + '/filtered/genotypes_runtime.csv')