In [None]:
import pandas as pd
import numpy as np
import tensorqtl
from tensorqtl import genotypeio, cis, trans
import matplotlib.pyplot as plt

# define paths to data
plink_prefix_path = 'swath-ms.01'
expression_bed = 'swath-ms.expression.bed.gz'
covariates_file = 'swath-ms.covariates.txt'
prefix = 'swath-ms'

# load phenotypes and covariates
phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(expression_bed)
covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T

# PLINK reader for genotypes
pr = genotypeio.PlinkReader(plink_prefix_path)
genotype_df = pr.load_genotypes()
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

Mapping files:  33%|███▎      | 1/3 [05:20<10:41, 320.92s/it]

In [None]:
print(phenotype_df)

### *cis*-QTL: nominal p-values for all variant-phenotype pairs

In [None]:
# map all cis-associations (results for each chromosome are written to file)

# all  genes
cis.map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix)

### *cis*-QTL: empirical p-values for phenotypes

In [5]:
# all genes
cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df)

ValueError: Lengths must match to compare

In [None]:
cis_df.head()

### *trans*-QTL mapping

In [None]:
# run mapping
# to limit output size, only associations with p-value <= 1e-5 are returned
trans_df = trans.map_trans(genotype_df, phenotype_df, covariates_df, batch_size=20000,
                           return_sparse=True, pval_threshold=1e-5, maf_threshold=0.05)

In [None]:
# remove cis-associations
trans_df = trans.filter_cis(trans_df, phenotype_pos_df.T.to_dict(), variant_df, window=5000000)

In [None]:
trans_df.head()