In [6]:
import pandas as pd
import numpy as np
import tensorqtl
from tensorqtl import genotypeio, cis, trans
import matplotlib.pyplot as plt

# define paths to data
plink_prefix_path = 'swath-ms.01'
expression_bed = 'swath-ms.expression.bed.gz'
covariates_file = 'swath-ms.covariates.txt'
prefix = 'swath-ms'

# load phenotypes and covariates
phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(expression_bed)
covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T

# PLINK reader for genotypes
pr = genotypeio.PlinkReader(plink_prefix_path)
genotype_df = pr.load_genotypes()
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

Mapping files: 100%|██████████| 3/3 [07:40<00:00, 153.35s/it]


In [7]:
print(phenotype_df)

                 1044209248  1044209320  1044209547  1044209603  1044209611  \
gene_id                                                                       
ENSG00000078369   14.360250   14.382605   13.637856   13.905451   13.637856   
ENSG00000116288   15.244771   15.193870   15.193870   15.021602   14.799795   
ENSG00000074800   12.952659   15.289824   14.757962   15.561188   16.013389   
ENSG00000142657   15.039181   15.118557   14.541188   14.754732   14.707662   
ENSG00000009724    9.663436   10.561325    9.663436    9.257668   10.635872   
...                     ...         ...         ...         ...         ...   
ENSG00000182492    8.296330   10.664333    9.543829   10.664333   10.704496   
ENSG00000172534   14.995723   15.228235   14.636681   14.995723   15.228235   
ENSG00000196924   15.424342   15.424342   15.860518   14.671412   15.434810   
ENSG00000071553   13.824646   12.663850   12.892856   12.783846   12.663850   
ENSG00000160211   16.369175   16.169222   16.141661 

### *cis*-QTL: nominal p-values for all variant-phenotype pairs

In [8]:
# map all cis-associations (results for each chromosome are written to file)

# all  genes
cis.map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix)

ValueError: Lengths must match to compare

### *cis*-QTL: empirical p-values for phenotypes

In [9]:
# all genes
cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df)

ValueError: Lengths must match to compare

In [None]:
cis_df.head()

### *trans*-QTL mapping

In [10]:
# run mapping
# to limit output size, only associations with p-value <= 1e-5 are returned
trans_df = trans.map_trans(genotype_df, phenotype_df, covariates_df, batch_size=20000,
                           return_sparse=True, pval_threshold=1e-5, maf_threshold=0.05)

ValueError: Lengths must match to compare

In [11]:
# remove cis-associations
trans_df = trans.filter_cis(trans_df, phenotype_pos_df.T.to_dict(), variant_df, window=5000000)

NameError: name 'trans_df' is not defined

In [None]:
trans_df.head()