In [16]:
import pandas as pd
import numpy as np
import tensorqtl
from tensorqtl import genotypeio, cis, trans
import matplotlib.pyplot as plt

# define paths to data
plink_prefix_path = 'swath-ms.01'
expression_bed = 'swath-ms.expression.bed.gz'
covariates_file = 'swath-ms.covariates.txt'
prefix = 'swath-ms'

# load phenotypes and covariates
phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(expression_bed)
covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T

# PLINK reader for genotypes
pr = genotypeio.PlinkReader(plink_prefix_path)
genotype_df = pr.load_genotypes()
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]


Mapping files:   0%|          | 0/3 [00:00<?, ?it/s][A
Mapping files:  33%|███▎      | 1/3 [11:15<22:31, 675.69s/it][A
Mapping files:  67%|██████▋   | 2/3 [11:16<07:53, 473.14s/it][A
Mapping files: 100%|██████████| 3/3 [11:19<00:00, 226.49s/it][A


### *cis*-QTL: nominal p-values for all variant-phenotype pairs

In [17]:
# map all cis-associations (results for each chromosome are written to file)

# all  genes
cis.map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix)

ValueError: Lengths must match to compare

### *cis*-QTL: empirical p-values for phenotypes

In [None]:
# all genes
cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df)

In [None]:
cis_df.head()

### *trans*-QTL mapping

In [None]:
# run mapping
# to limit output size, only associations with p-value <= 1e-5 are returned
trans_df = trans.map_trans(genotype_df, phenotype_df, covariates_df, batch_size=20000,
                           return_sparse=True, pval_threshold=1e-5, maf_threshold=0.05)

In [None]:
# remove cis-associations
trans_df = trans.filter_cis(trans_df, phenotype_pos_df.T.to_dict(), variant_df, window=5000000)

In [None]:
trans_df.head()