In [1]:
import os 
import pandas as pd
pd.set_option('display.max_columns', None)
import subprocess
import glob
import pybedtools as pbt 
pbt.set_tempdir('/mnt/hpcscratch/jreyna/')
pbt.set_bedtools_path('/mnt/BioApps/bedtools/bin/')
import numpy as np

os.chdir('/mnt/BioHome/jreyna/jreyna/projects/dchallenge/')

genome_sizes = 'results/refs/hg19/hg19.chrom.sizes'

# make the directory to save our data
outdir = 'results/notebooks/sgls/pieqtls_with_gwas/'
os.makedirs(outdir, exist_ok=True)

In [2]:
gs_fn = 'results/refs/hg19/hg19.chrom.nochr.sizes'
gencode_fn = 'results/refs/gencode/v30/gencode.v30.annotation.bed'
gencode = pd.read_table(gencode_fn, header=None)
gencode = gencode.drop_duplicates(5)
gencode_dict = {k:v for k,v in gencode[[5,6]].values.tolist()} 
gencode_dict.update({v:k for k,v in gencode[[5,6]].values.tolist()})

## Load Significant GWAS

In [3]:
major_gwas = ['T1D_32005708', 'T1D_34594039_GCST90018925', 'T1D_34012112_Gaulton']

In [4]:
data = []
gwas = 'results/main/coloc/Data/T1D_GWAS/*/GRCh37/GWAS_input_colocalization_pval_lt_5eMinus8.txt'
for fn in glob.glob(gwas):
    path_info = fn.split('/')
    df = pd.read_table(fn)
    
    if path_info[5] not in major_gwas:
        continue
        
    df['gwas_source'] = path_info[5]
    data.append(df)    
gwas_df = pd.concat(data)
gwas_df['sid'] = gwas_df['CHR'].str.replace('chr', '') + ':' + gwas_df['POS'].astype(str)

# loading finemap data into bedtools
gwas_bed = gwas_df.iloc[:, [0,1,1]]
gwas_bed.columns = ['chrom', 'start', 'end']
gwas_bed['start'] = gwas_bed['start'] - 1
gwas_pbt = pbt.BedTool.from_dataframe(gwas_bed)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gwas_bed['start'] = gwas_bed['start'] - 1


In [5]:
individual_gwas_studies = gwas_df.groupby('gwas_source').sid.nunique()
individual_gwas_studies = individual_gwas_studies.to_frame()
individual_gwas_studies.columns = ['Number of Sig. GWAS']

In [6]:
individual_gwas_studies

Unnamed: 0_level_0,Number of Sig. GWAS
gwas_source,Unnamed: 1_level_1
T1D_32005708,21097
T1D_34012112_Gaulton,45798
T1D_34594039_GCST90018925,20419


## Load pieQTLs

In [7]:
pieqtls = glob.glob('results/main/pieqtls/2021_chandra_et_al/*/proximal.pieqtls.tsv')

pieqtl_data = []
for pieqtl in pieqtls:
    print(pieqtl)
    
    cline = pieqtl.split('/')[-2]
    df = pd.read_table(pieqtl)    
    df['cline'] = cline
    pieqtl_data.append(df) 
    
pieqtl_df = pd.concat(pieqtl_data)
pieqtl_df['Chromosome'] = pieqtl_df['Chromosome']
pieqtl_df.loc[:, 'sid'] = pieqtl_df['Chromosome'].str.replace('chr', '') + ':' + \
                          pieqtl_df['pieQTL.Position'].astype(str)

results/main/pieqtls/2021_chandra_et_al/B-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/NK-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/monocyte_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/CD4_T-cell_naive/proximal.pieqtls.tsv
results/main/pieqtls/2021_chandra_et_al/CD8_T-cell_naive/proximal.pieqtls.tsv


In [8]:
individual_cells = pieqtl_df.groupby('cline').Target_geneID.nunique()
individual_cells = individual_cells.to_frame()
individual_cells.columns = ['Number of pieQTL Genes']
individual_cells

Unnamed: 0_level_0,Number of pieQTL Genes
cline,Unnamed: 1_level_1
B-cell_naive,1233
CD4_T-cell_naive,1168
CD8_T-cell_naive,1081
NK-cell_naive,872
monocyte_naive,1065


In [9]:
individual_cells = pieqtl_df.groupby('cline').sid.nunique()
individual_cells = individual_cells.to_frame()
individual_cells.columns = ['Number of pieQTL SNPs']
individual_cells

Unnamed: 0_level_0,Number of pieQTL SNPs
cline,Unnamed: 1_level_1
B-cell_naive,8110
CD4_T-cell_naive,6800
CD8_T-cell_naive,5539
NK-cell_naive,5332
monocyte_naive,6006


In [10]:
pieqtl_bed = pieqtl_df.iloc[:, [1,2,2,-1]]
pieqtl_bed.columns = ['chrom', 'start', 'end', 'cline']
pieqtl_bed.loc[:, 'start'] = pieqtl_bed['start'] - 1
pieqtl_pbt = pbt.BedTool.from_dataframe(pieqtl_bed)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pieqtl_bed.loc[:, 'start'] = pieqtl_bed['start'] - 1


In [11]:
pieqtl_bed.shape

(35898, 4)

## Intersect pieQTLs and Sig GWAS

In [12]:
major_cols = ['ge_source',
              'rsid',
             'chrom',
             'pos',
             'geneid',
             'genename',
             'allele1',
             'allele2',
             'maf',
             'beta_x',
             'se',
             'z',
             'prob',
             'log10bf',
             'mean',
             'sd',
             'mean_incl',
             'sd_incl',
             'pval',
             'gwas_source',
             'pieQTL.ID',
             'Chromosome',
             'pieQTL.Position',
             'Target_geneName',
             'TSS',
             'pvalue',
             'FDR (DICE)',
             'beta_y',
             'ref',
             'alt',
             'Mean.TPM.Homozygous.Reference',
             'Mean.TPM.Heterozygous',
             'Mean.TPM.Homozygous.Alternative',
             'Interaction_type',
             'GWAS.Trait',
             'cline',
             'regionID',
             'GWASLoci',
             'index']


### Intersection

In [13]:
intersect_df = pd.merge(gwas_df, pieqtl_df,
                        left_on=['CHR', 'POS'],
                        right_on=['Chromosome', 'pieQTL.Position'],
                        how='inner')

In [14]:
indiv_cell_intersect = intersect_df.groupby('cline').Target_geneID.nunique().to_frame()
indiv_cell_intersect.columns = ['Number of Unique Genes from pieQTL & Sig.GWAS Intersection']
indiv_cell_intersect

Unnamed: 0_level_0,Number of Unique Genes from pieQTL & Sig.GWAS Intersection
cline,Unnamed: 1_level_1
B-cell_naive,39
CD4_T-cell_naive,33
CD8_T-cell_naive,35
NK-cell_naive,45
monocyte_naive,25


In [15]:
indiv_cell_intersect = intersect_df.groupby('cline').sid_x.nunique().to_frame()
indiv_cell_intersect.columns = ['Number of Unique SNPs from pieQTL & Sig.GWAS Intersection']
indiv_cell_intersect

Unnamed: 0_level_0,Number of Unique SNPs from pieQTL & Sig.GWAS Intersection
cline,Unnamed: 1_level_1
B-cell_naive,348
CD4_T-cell_naive,322
CD8_T-cell_naive,269
NK-cell_naive,319
monocyte_naive,116


In [16]:
indiv_cell_intersect = intersect_df.groupby(['cline', 'gwas_source']).Target_geneID.nunique().to_frame()
indiv_cell_intersect.columns = ['Number of Unique Genes from pieQTL & Sig.GWAS Intersection']
indiv_cell_intersect

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of Unique Genes from pieQTL & Sig.GWAS Intersection
cline,gwas_source,Unnamed: 2_level_1
B-cell_naive,T1D_32005708,28
B-cell_naive,T1D_34012112_Gaulton,34
B-cell_naive,T1D_34594039_GCST90018925,21
CD4_T-cell_naive,T1D_32005708,15
CD4_T-cell_naive,T1D_34012112_Gaulton,29
CD4_T-cell_naive,T1D_34594039_GCST90018925,14
CD8_T-cell_naive,T1D_32005708,18
CD8_T-cell_naive,T1D_34012112_Gaulton,30
CD8_T-cell_naive,T1D_34594039_GCST90018925,15
NK-cell_naive,T1D_32005708,34


In [17]:
indiv_cell_intersect = intersect_df.groupby(['cline', 'gwas_source']).sid_x.nunique().to_frame()
indiv_cell_intersect.columns = ['Number of Unique SNPs from pieQTL & Sig.GWAS Intersection']
indiv_cell_intersect

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of Unique SNPs from pieQTL & Sig.GWAS Intersection
cline,gwas_source,Unnamed: 2_level_1
B-cell_naive,T1D_32005708,186
B-cell_naive,T1D_34012112_Gaulton,241
B-cell_naive,T1D_34594039_GCST90018925,116
CD4_T-cell_naive,T1D_32005708,142
CD4_T-cell_naive,T1D_34012112_Gaulton,198
CD4_T-cell_naive,T1D_34594039_GCST90018925,53
CD8_T-cell_naive,T1D_32005708,136
CD8_T-cell_naive,T1D_34012112_Gaulton,155
CD8_T-cell_naive,T1D_34594039_GCST90018925,73
NK-cell_naive,T1D_32005708,171


In [18]:
def get_genename(x):
    if x in gencode_dict:
        return(gencode_dict[x])
    else:
        return(x)

In [19]:
intersect_df.loc[:, 'geneid'] = intersect_df['Target_geneID'].str.replace('\.[0-9]*', '', regex=True)
intersect_df.loc[:, 'genename'] = intersect_df.loc[:, 'geneid'].apply(get_genename)

In [20]:
intersect_df.loc[:, 'geneid'].nunique()

91

In [21]:
for x in intersect_df.genename.unique():
    print(x)

C1orf216
SF3A3
FHL3
PTPN22
RGS1
NPM1P33
KRT18P39
CD28
SLC22A5
BTN3A1
BTN2A2
ZSCAN26
ZNF165
AL645939.1
IFITM4P
ZDHHC20P1
ZFP57
AL645929.1
HCG4B
AL671277.2
RNF39
FLOT1
DHX16
PPP1R18
ZNRD1ASP
TRIM26
VARS2
MRPS18B
TUBB
C6orf136
RF00019
PRRC2A
BAG6
CCHCR1
CSNK2B
C6orf47
TCF19
HCG27
AL662844.3
AL662844.4
MICB
MICA
AL645933.2
LST1
NCR3
LY6G5B
LY6G5C
C6orf48
AIF1
MSH5
PPT2
SKIV2L
BTNL2
ENSG00000228962
TSBP1
HCG24
TAP2
WDR46
PFDN6
RPL32P1
BACH2
AP003774.4
M6PR
LINC02390
SUOX
ATXN2
TMEM116
HECTD4
MAPKAPK5
CTSH
AC009121.1
RMI2
DND1P1
KANSL1-AS1
AP005482.1
ZGLP1
PRKD2
TMPRSS3
ZSCAN9
TRIM27
ENSG00000261353
BTN2A3P
ZKSCAN4
BTN2A1
BTN3A2
HMGN4
HCG18
GNL1
ZSCAN23
ZNF192P1


## Summarize

In [22]:
# find the unique SNPs
uniq_snps_by_cells = intersect_df.groupby('cline').sid_x.nunique()
uniq_snps_by_cells = uniq_snps_by_cells.to_frame()

# find the unique genes 
uniq_genes_by_cells = intersect_df.groupby('cline').geneid.nunique()
uniq_genes_by_cells = uniq_genes_by_cells.to_frame()

# merge snps and genes
uniq_counts_by_cells = pd.merge(uniq_snps_by_cells, uniq_genes_by_cells, left_index=True, right_index=True)
uniq_counts_by_cells.columns = ['Number of Unique SNPs', 'Number of Unique Genes']
uniq_counts_by_cells.index.name = 'Cell Line'

In [23]:
# save the file
excel_analysis = os.path.join(outdir, 'Unique_Counts_By_Cell_Line.xlsx')
uniq_counts_by_cells.to_excel(excel_analysis, sheet_name='pieqtls')

In [24]:
uniq_genes_by_cells

Unnamed: 0_level_0,geneid
cline,Unnamed: 1_level_1
B-cell_naive,39
CD4_T-cell_naive,33
CD8_T-cell_naive,35
NK-cell_naive,45
monocyte_naive,25


In [25]:
excel_analysis

'results/notebooks/Intersect_PieQTLs_with_T1D_Significant_GWAS_SNPs/Unique_Counts_By_Cell_Line.xlsx'

#### Write the gene list as well

In [26]:
gh_list = sorted(intersect_df.geneid.unique().tolist())
gh_fn = os.path.join(outdir, 'gene_list.txt')
with open(gh_fn, 'w') as fw:
    for x in gh_list:
        fw.write('{}\n'.format(x))

In [27]:
gh_list = sorted(intersect_df.geneid.unique().tolist())

In [28]:
len(gh_list)

91

In [29]:
intersect_df[['cline', 'geneid']]

Unnamed: 0,cline,geneid
0,monocyte_naive,ENSG00000142686
1,NK-cell_naive,ENSG00000183431
2,CD4_T-cell_naive,ENSG00000183386
3,CD8_T-cell_naive,ENSG00000183386
4,NK-cell_naive,ENSG00000183431
...,...,...
2137,CD4_T-cell_naive,ENSG00000271821
2138,CD4_T-cell_naive,ENSG00000206344
2139,CD4_T-cell_naive,ENSG00000271821
2140,NK-cell_naive,ENSG00000206344


#### Get the unique genes per cell type 

In [30]:
genes_by_cell = intersect_df[['cline', 'geneid']].drop_duplicates()
genes_by_cell.sort_values(['cline', 'geneid'], inplace=True)
genes_by_cell['source'] = 'pieqtls_with_sig_gwas'
genes_by_cell.columns = ['cline', 'geneid', 'source']
fn = os.path.join(outdir, 'genes_by_cell.xlsx')
genes_by_cell.to_excel(fn, index=False)

## Check Out SNPs

In [31]:
intersect_df.head(4)

Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename
0,chr1,36107510,-0.129921,0.023253,2.31e-08,520580.0,T1D_34012112_Gaulton,1:36107510,rs676614,chr1,36107510,ENSG00000142686.7,C1orf216,36185073,6.31e-07,0.001185,0.75,C,A,10.26,11.74,14.23,Indirect_pieQTL,Lupus erythematosus systemic,monocyte_naive,1:36107510,ENSG00000142686,C1orf216
1,chr1,38397341,-0.083213,0.015137,3.85e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183431.7,SF3A3,38456593,2.39e-09,1.8e-05,-0.8,G,A,105.05,94.51,87.2,Indirect_pieQTL,Platelet function tests,NK-cell_naive,1:38397341,ENSG00000183431,SF3A3
2,chr1,38397341,-0.083213,0.015137,3.85e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183386.5,FHL3,38471278,1.55e-08,6.4e-05,-0.78,G,A,4.75,3.91,2.04,Direct_pieQTL,Platelet function tests,CD4_T-cell_naive,1:38397341,ENSG00000183386,FHL3
3,chr1,38397341,-0.083213,0.015137,3.85e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183386.5,FHL3,38471278,1.35e-08,5.5e-05,-0.81,G,A,8.44,7.1,5.06,Direct_pieQTL,Platelet function tests,CD8_T-cell_naive,1:38397341,ENSG00000183386,FHL3


In [32]:
intersect_df.loc[intersect_df.genename == 'RPS62']

Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename


In [33]:
intersect_df.loc[intersect_df.geneid == 'ENSG00000170485']

Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename


In [34]:
high = ['SF3A3',
 'PTPN22',
 'RGS1',
 'CTLA4',
 'SLC22A5',
 'BTN3A2',
 'BTN2A2',
 'BTN3A1',
 'BTN2A1',
 'ZKSCAN4',
 'TRIM27',
 'ZFP57',
 'TRIM26',
 'MRPS18B',
 'FLOT1',
 'VARS2',
 'MICA',
 'NCR3',
 'BAG6',
 'TSBP1',
 'BTNL2',
 'TAP2',
 'WDR46',
 'BACH2',
 'IL2RA',
 'RBM17',
 'M6PR',
 'SUOX',
 'RPS26',
 'CUX2',
 'ATXN2',
 'NAA25',
 'CTSH',
 'RMI2',
 'ZPBP2',
 'GSDMB',
 'ORMDL3',
 'VASP',
 'PRKD2',
 'IL2RB',
 'RAC2',
 'MFNG']

In [35]:
high_df = intersect_df.loc[(intersect_df.genename.isin(high)) & (intersect_df.Interaction_type == 'Direct_pieQTL')]

In [36]:
for rs in sorted(set(high_df['pieQTL.ID'].tolist())):
    print(rs)

rs1004062
rs10801129
rs10806423
rs10842537
rs10842660
rs10842662
rs10858022
rs12149160
rs1217418
rs1217419
rs1217420
rs12369009
rs12592898
rs13214027
rs1323298
rs1345229
rs1610625
rs1805722
rs1805723
rs184093
rs1977
rs1978
rs1979
rs2071790
rs2072806
rs2080116
rs2080117
rs209122
rs209137
rs209138
rs209142
rs2394164
rs2516708
rs2516714
rs2516715
rs2518030
rs261947
rs28551159
rs3047288
rs3130843
rs3130889
rs3131059
rs3135303
rs3135315
rs34260811
rs35260072
rs369136
rs371337998
rs3742003
rs3757138
rs3803170
rs413158
rs45553631
rs4634439
rs4713242
rs4713244
rs58521088
rs60254670
rs62408211
rs6454805
rs6661817
rs66823108
rs68072215
rs6899623
rs6920256
rs7189239
rs72841536
rs7309325
rs769178
rs79166578
rs905671
rs918738
rs9348716
rs9366654
rs9366655
rs9379871
rs9393715
rs943689
rs969577
ss1388091585
ss1388091598


In [37]:
intersect_df

Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename
0,chr1,36107510,-0.129921,0.023253,2.310000e-08,520580.0,T1D_34012112_Gaulton,1:36107510,rs676614,chr1,36107510,ENSG00000142686.7,C1orf216,36185073,6.310000e-07,0.001185,0.75,C,A,10.26,11.74,14.23,Indirect_pieQTL,Lupus erythematosus systemic,monocyte_naive,1:36107510,ENSG00000142686,C1orf216
1,chr1,38397341,-0.083213,0.015137,3.850000e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183431.7,SF3A3,38456593,2.390000e-09,0.000018,-0.80,G,A,105.05,94.51,87.20,Indirect_pieQTL,Platelet function tests,NK-cell_naive,1:38397341,ENSG00000183431,SF3A3
2,chr1,38397341,-0.083213,0.015137,3.850000e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183386.5,FHL3,38471278,1.550000e-08,0.000064,-0.78,G,A,4.75,3.91,2.04,Direct_pieQTL,Platelet function tests,CD4_T-cell_naive,1:38397341,ENSG00000183386,FHL3
3,chr1,38397341,-0.083213,0.015137,3.850000e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183386.5,FHL3,38471278,1.350000e-08,0.000055,-0.81,G,A,8.44,7.10,5.06,Direct_pieQTL,Platelet function tests,CD8_T-cell_naive,1:38397341,ENSG00000183386,FHL3
4,chr1,38397369,-0.082736,0.015137,4.610000e-08,520580.0,T1D_34012112_Gaulton,1:38397369,rs35267671,chr1,38397369,ENSG00000183431.7,SF3A3,38456593,2.390000e-09,0.000018,-0.80,C,T,105.05,94.51,87.20,Indirect_pieQTL,Platelet function tests,NK-cell_naive,1:38397369,ENSG00000183431,SF3A3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2137,chr6,31229274,0.187900,0.033500,2.085000e-08,,T1D_34594039_GCST90018925,6:31229274,rs9264392,chr6,31229274,ENSG00000271821.1,XXbac-BPG299F13.14,31169695,3.220000e-05,0.012179,0.67,T,C,2.90,4.78,6.35,Direct_pieQTL,Beta-2 microglobulin|Hepatitis B chronic,CD4_T-cell_naive,6:31229274,ENSG00000271821,AL662844.3
2138,chr6,31229282,0.188800,0.033800,2.369000e-08,,T1D_34594039_GCST90018925,6:31229282,rs9264394,chr6,31229282,ENSG00000206344.6,HCG27,31165537,4.000000e-06,0.002104,0.75,T,A,10.11,14.01,19.81,Direct_pieQTL,Beta-2 microglobulin|Hepatitis B chronic,CD4_T-cell_naive,6:31229282,ENSG00000206344,HCG27
2139,chr6,31229282,0.188800,0.033800,2.369000e-08,,T1D_34594039_GCST90018925,6:31229282,rs9264394,chr6,31229282,ENSG00000271821.1,XXbac-BPG299F13.14,31169695,2.150000e-05,0.008637,0.69,T,A,2.90,4.65,6.35,Direct_pieQTL,Beta-2 microglobulin|Hepatitis B chronic,CD4_T-cell_naive,6:31229282,ENSG00000271821,AL662844.3
2140,chr6,31266117,-0.164800,0.028800,1.012000e-08,,T1D_34594039_GCST90018925,6:31266117,rs2524095,chr6,31266117,ENSG00000206344.6,HCG27,31165537,2.380000e-05,0.012988,0.57,A,C,3.11,3.92,5.47,Direct_pieQTL,Breast neoplasms|C-reactive protein|Psoriasis|...,NK-cell_naive,6:31266117,ENSG00000206344,HCG27


## Intersect PC-HiC with HiChIP Loops

In [38]:
# create locus A columns which require startA and endA
def pos_to_bin(pos, res):
    start = int(np.floor(pos / res) * res)
    end = start + res 
    return([start, end])

lociA = intersect_df['pieQTL.Position'].apply(pos_to_bin, res=5000)
lociA = pd.DataFrame(lociA.values.tolist())
lociA.columns = ['startA', 'endA']

# create locus B columns which require startB and endB
def tss_to_bin(tss, res, slop=0):
    start = int(np.floor(tss / res) * res)
    end = start + res 
    return([start, end])

lociB = intersect_df['TSS'].apply(tss_to_bin, res=5000)
lociB = pd.DataFrame(lociB.values.tolist())
lociB.columns = ['startB', 'endB']

# create locus columns to main intersect df 
intersect_df = pd.concat([intersect_df, lociA, lociB], axis=1)

In [39]:
intersect_df

Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename,startA,endA,startB,endB
0,chr1,36107510,-0.129921,0.023253,2.310000e-08,520580.0,T1D_34012112_Gaulton,1:36107510,rs676614,chr1,36107510,ENSG00000142686.7,C1orf216,36185073,6.310000e-07,0.001185,0.75,C,A,10.26,11.74,14.23,Indirect_pieQTL,Lupus erythematosus systemic,monocyte_naive,1:36107510,ENSG00000142686,C1orf216,36105000,36110000,36185000,36190000
1,chr1,38397341,-0.083213,0.015137,3.850000e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183431.7,SF3A3,38456593,2.390000e-09,0.000018,-0.80,G,A,105.05,94.51,87.20,Indirect_pieQTL,Platelet function tests,NK-cell_naive,1:38397341,ENSG00000183431,SF3A3,38395000,38400000,38455000,38460000
2,chr1,38397341,-0.083213,0.015137,3.850000e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183386.5,FHL3,38471278,1.550000e-08,0.000064,-0.78,G,A,4.75,3.91,2.04,Direct_pieQTL,Platelet function tests,CD4_T-cell_naive,1:38397341,ENSG00000183386,FHL3,38395000,38400000,38470000,38475000
3,chr1,38397341,-0.083213,0.015137,3.850000e-08,520580.0,T1D_34012112_Gaulton,1:38397341,rs34655914,chr1,38397341,ENSG00000183386.5,FHL3,38471278,1.350000e-08,0.000055,-0.81,G,A,8.44,7.10,5.06,Direct_pieQTL,Platelet function tests,CD8_T-cell_naive,1:38397341,ENSG00000183386,FHL3,38395000,38400000,38470000,38475000
4,chr1,38397369,-0.082736,0.015137,4.610000e-08,520580.0,T1D_34012112_Gaulton,1:38397369,rs35267671,chr1,38397369,ENSG00000183431.7,SF3A3,38456593,2.390000e-09,0.000018,-0.80,C,T,105.05,94.51,87.20,Indirect_pieQTL,Platelet function tests,NK-cell_naive,1:38397369,ENSG00000183431,SF3A3,38395000,38400000,38455000,38460000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2137,chr6,31229274,0.187900,0.033500,2.085000e-08,,T1D_34594039_GCST90018925,6:31229274,rs9264392,chr6,31229274,ENSG00000271821.1,XXbac-BPG299F13.14,31169695,3.220000e-05,0.012179,0.67,T,C,2.90,4.78,6.35,Direct_pieQTL,Beta-2 microglobulin|Hepatitis B chronic,CD4_T-cell_naive,6:31229274,ENSG00000271821,AL662844.3,31225000,31230000,31165000,31170000
2138,chr6,31229282,0.188800,0.033800,2.369000e-08,,T1D_34594039_GCST90018925,6:31229282,rs9264394,chr6,31229282,ENSG00000206344.6,HCG27,31165537,4.000000e-06,0.002104,0.75,T,A,10.11,14.01,19.81,Direct_pieQTL,Beta-2 microglobulin|Hepatitis B chronic,CD4_T-cell_naive,6:31229282,ENSG00000206344,HCG27,31225000,31230000,31165000,31170000
2139,chr6,31229282,0.188800,0.033800,2.369000e-08,,T1D_34594039_GCST90018925,6:31229282,rs9264394,chr6,31229282,ENSG00000271821.1,XXbac-BPG299F13.14,31169695,2.150000e-05,0.008637,0.69,T,A,2.90,4.65,6.35,Direct_pieQTL,Beta-2 microglobulin|Hepatitis B chronic,CD4_T-cell_naive,6:31229282,ENSG00000271821,AL662844.3,31225000,31230000,31165000,31170000
2140,chr6,31266117,-0.164800,0.028800,1.012000e-08,,T1D_34594039_GCST90018925,6:31266117,rs2524095,chr6,31266117,ENSG00000206344.6,HCG27,31165537,2.380000e-05,0.012988,0.57,A,C,3.11,3.92,5.47,Direct_pieQTL,Breast neoplasms|C-reactive protein|Psoriasis|...,NK-cell_naive,6:31266117,ENSG00000206344,HCG27,31265000,31270000,31165000,31170000


In [40]:
# creating a dictionary which matches 
# hichip and pc-hic data
hichip_pchic_matches = {'monocytes': 'monocyte_naive',
                        'naive-b': 'B-cell_naive',
                        'total-b': 'B-cell_naive',
                        'gms_merged': 'B-cell_naive', 
                        'naive-cd4': 'CD4_T-cell_naive', 
                        'total-cd4': 'CD4_T-cell_naive', 
                        'non-activated-total-cd4': 'CD4_T-cell_naive',
                        'naive-cd8': 'CD8_T-cell_naive',
                        'total-cd8': 'CD8_T-cell_naive'} 

In [50]:
# getting a list of pc_hic files 
pc_hics = glob.glob('results/main/pc_hic/2016_javierre/processing/*.bedpe')
pc_hics += glob.glob('results/main/pc_hic/2019_jung/processing/*.bedpe')

# adding a merge id for post fixing
intersect_df['mid'] = range(intersect_df.shape[0])

# initializing the merge list and column names
pchic_merge_data = []
pchic_cols = ['chrA', 'startA', 'endA', 'chrB', 'startB', 'endB', 'score', 'pchic_cline']
intersect_bedpe_cols = ['CHR', 'startA', 'endA', 'CHR', 'startB', 'endB', 'mid']

for fn in pc_hics:
    
    pchic_cline = os.path.basename(fn).split('.')[0]
    if pchic_cline in hichip_pchic_matches: 
        
        # getting pieqtl data for the current cell line
        pieqtl_cline = hichip_pchic_matches[pchic_cline]
        pieqtl_cline_df = intersect_df.loc[intersect_df.cline == pieqtl_cline, intersect_bedpe_cols]
        pieqtl_cline_df.iloc[:, 0] = pieqtl_cline_df.iloc[:, 0].str.replace('chr', '')
        pieqtl_cline_df.iloc[:, 3] = pieqtl_cline_df.iloc[:, 0].str.replace('chr', '')
        
        # loading pc-hic data for the current cell line
        pchic_cline_df = pd.read_table(fn, names=pchic_cols)
        pchic_cline_df.chrA = pchic_cline_df.chrA.str.replace('chr', '')
        pchic_cline_df.chrB = pchic_cline_df.chrB.str.replace('chr', '')
        
        # intersecting pieqtl and pc-hic bedpes
        pieqtl_cline_pbt = pbt.BedTool.from_dataframe(pieqtl_cline_df)
        pchic_cline_pbt = pbt.BedTool.from_dataframe(pchic_cline_df)
        both_loops = pieqtl_cline_pbt.pairtopair(pchic_cline_pbt)        
        both_loops = both_loops.to_dataframe(disable_auto_names=True, header=None).iloc[:, 0:15]
        
        if both_loops.shape[0] > 0: 
            pchic_merge_data.append(both_loops)
            print('overlap found: {}'.format(fn))
        else:
            print('overlap not found: {}'.format(fn))
        

overlap found: results/main/pc_hic/2016_javierre/processing/non-activated-total-cd4.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/monocytes.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/naive-b.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/total-cd8.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/total-cd4.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/total-b.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/naive-cd4.bedpe
overlap found: results/main/pc_hic/2016_javierre/processing/naive-cd8.bedpe
overlap not found: results/main/pc_hic/2019_jung/processing/gms_merged.bedpe


In [49]:
pchic_cline_df

Unnamed: 0,chrA,startA,endA,chrB,startB,endB,score,pchic_cline
0,1,915520,932176,1,1158519,1183837,0.000000,gms_merged
1,1,915520,932176,1,1309300,1311643,0.000000,gms_merged
2,1,915520,932176,1,1435680,1451318,0.634289,gms_merged
3,1,915520,932176,1,1558274,1582294,0.839169,gms_merged
4,1,915520,932176,1,943049,965801,0.542704,gms_merged
...,...,...,...,...,...,...,...,...
6074044,X,154894664,154901659,X,154857004,154860508,0.000000,gms_merged
6074045,X,154894664,154901659,X,154865272,154869950,0.000000,gms_merged
6074046,X,154894664,154901659,X,154869951,154876426,0.625975,gms_merged
6074047,X,154894664,154901659,X,154879572,154883728,0.028323,gms_merged


In [48]:
pieqtl_cline_df

Unnamed: 0,CHR,startA,endA,CHR.1,startB,endB,mid
10,chr1,114400000,114405000,chr1,114410000,114415000,10
11,chr1,114400000,114405000,chr1,114410000,114415000,11
12,chr1,114400000,114405000,chr1,114410000,114415000,12
13,chr1,114400000,114405000,chr1,114410000,114415000,13
14,chr1,114400000,114405000,chr1,114410000,114415000,14
...,...,...,...,...,...,...,...
2099,chr6,29905000,29910000,chr6,29855000,29860000,2099
2123,chr6,29940000,29945000,chr6,30040000,30045000,2123
2133,chr6,31105000,31110000,chr6,31125000,31130000,2133
2134,chr6,31145000,31150000,chr6,31125000,31130000,2134


In [51]:
pchic_merge_df = pd.concat(pchic_merge_data, axis=0)
pchic_merge_df.columns = ['hichip_chrA', 'hichip_startA', 'hichip_endA',
                    'hichip_chrB', 'hichip_startB', 'hichip_endB', 'hichip_mid', 
                    'pchic_chrA', 'pchic_startA', 'pchic_endA',
                    'pchic_chrB', 'pchic_startB', 'pchic_endB', 'pchic_score', 'pchic_cline']
pchic_merge_df = pchic_merge_df.sort_values('pchic_score', ascending=False)
pchic_merge_df = pchic_merge_df.drop_duplicates(subset=['hichip_mid'], keep='first')

In [52]:
# creating a table of HiChIP ID versus PC-HiC cell versus score (entries) 
pchic_merge_clean = pchic_merge_df[['hichip_mid', 'pchic_score', 'pchic_cline']]
pchic_merge_clean = pchic_merge_clean.pivot(index='hichip_mid', columns='pchic_cline', values='pchic_score')
pchic_merge_clean.columns = 'pchic.' + pchic_merge_clean.columns

In [53]:
# merging and cleaning the merged dataset 
intersect_df = intersect_df.merge(pchic_merge_clean, left_on='mid', right_on='hichip_mid')
intersect_df.drop('mid', axis=1, inplace=True)

In [54]:
intersect_df

Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename,startA,endA,startB,endB,pchic.monocytes,pchic.naive-b,pchic.naive-cd4,pchic.naive-cd8,pchic.non-activated-total-cd4,pchic.total-b,pchic.total-cd4,pchic.total-cd8
0,chr1,36107510,-0.129921,0.023253,2.310000e-08,520580.0,T1D_34012112_Gaulton,1:36107510,rs676614,chr1,36107510,ENSG00000142686.7,C1orf216,36185073,6.310000e-07,0.001185,0.75,C,A,10.26,11.74,14.23,Indirect_pieQTL,Lupus erythematosus systemic,monocyte_naive,1:36107510,ENSG00000142686,C1orf216,36105000,36110000,36185000,36190000,3.345364,,,,,,,
1,chr2,204614508,-0.091066,0.016533,3.630000e-08,520580.0,T1D_34012112_Gaulton,2:204614508,rs7588874,chr2,204614508,ENSG00000178562.13,CD28,204571198,2.800000e-06,0.003369,0.70,G,A,1113.96,1207.70,1362.35,Direct_pieQTL,Rheumatoid arthritis|Sclerosing cholangitis,CD4_T-cell_naive,2:204614508,ENSG00000178562,CD28,204610000,204615000,204570000,204575000,,,,,4.315157,,,
2,chr5,131630852,0.078192,0.014121,3.070000e-08,520580.0,T1D_34012112_Gaulton,5:131630852,rs35260072,chr5,131630852,ENSG00000197375.8,SLC22A5,131705444,2.040000e-10,0.000001,-0.91,A,C,2.82,2.02,1.41,Direct_pieQTL,Ankylosing spondylitis|Autoimmune diseases|Bod...,monocyte_naive,5:131630852,ENSG00000197375,SLC22A5,131630000,131635000,131705000,131710000,10.510099,,,,,,,
3,chr5,131630852,0.078192,0.014121,3.070000e-08,520580.0,T1D_34012112_Gaulton,5:131630852,rs35260072,chr5,131630852,ENSG00000197375.8,SLC22A5,131705444,6.230000e-08,0.000153,-0.84,A,C,7.24,5.90,5.03,Indirect_pieQTL,Ankylosing spondylitis|Autoimmune diseases|Bod...,CD4_T-cell_naive,5:131630852,ENSG00000197375,SLC22A5,131630000,131635000,131705000,131710000,,,,,,,2.686006,
4,chr5,131630852,0.078192,0.014121,3.070000e-08,520580.0,T1D_34012112_Gaulton,5:131630852,rs35260072,chr5,131630852,ENSG00000197375.8,SLC22A5,131705444,4.220000e-07,0.000961,-0.78,A,C,6.19,5.08,4.31,Indirect_pieQTL,Ankylosing spondylitis|Autoimmune diseases|Bod...,CD8_T-cell_naive,5:131630852,ENSG00000197375,SLC22A5,131630000,131635000,131705000,131710000,,,,4.292315,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,chr6,28911802,0.156500,0.026000,1.696000e-09,24250.0,T1D_32005708,6:28911802,rs2071790,chr6,28911802,ENSG00000204713.6,TRIM27,28891766,2.580000e-05,0.009739,0.69,G,A,8.34,10.23,11.44,Direct_pieQTL,Body height,monocyte_naive,6:28911802,ENSG00000204713,TRIM27,28910000,28915000,28890000,28895000,26.097622,,,,,,,
234,chr6,28092307,0.201500,0.026800,5.149000e-14,24250.0,T1D_32005708,6:28092307,rs17711801,chr6,28092307,ENSG00000187626.7,ZKSCAN4,28227011,8.980000e-05,0.026584,0.72,C,G,10.80,12.91,14.41,Indirect_pieQTL,,CD8_T-cell_naive,6:28092307,ENSG00000187626,ZKSCAN4,28090000,28095000,28225000,28230000,,,,,,,,6.09541
235,chr6,30042955,0.160800,0.018600,4.554000e-18,,T1D_34594039_GCST90018925,6:30042955,rs1150736,chr6,30042955,ENSG00000204623.4,ZNRD1-AS1,30029417,5.740000e-05,0.019704,0.68,C,T,4.37,5.41,6.07,Direct_pieQTL,,CD4_T-cell_naive,6:30042955,ENSG00000204623,ZNRD1ASP,30040000,30045000,30025000,30030000,,,6.14492,,,,,
236,chr6,31149520,0.198200,0.031400,2.866000e-10,,T1D_34594039_GCST90018925,6:31149520,rs3130508,chr6,31149520,ENSG00000204536.9,CCHCR1,31126015,4.610000e-06,0.001997,0.87,G,A,6.95,8.86,11.35,Indirect_pieQTL,Glomerulonephritis membranous|Psoriasis|Psori...,B-cell_naive,6:31149520,ENSG00000204536,CCHCR1,31145000,31150000,31125000,31130000,,,,,,4.623636,,


In [57]:
pchic_support = []
for i, sr in intersect_df.iterrows():
    
    pchic_supp = 0 
    if sr.cline == 'CD4_T-cell_naive':
        if sr['pchic.naive-cd4'] >= 5 or sr['pchic.total-cd4'] >= 5 or sr['pchic.non-activated-total-cd4'] > 5:
            pchic_supp = 1 
    elif sr.cline == 'CD8_T-cell_naive':
        if sr['pchic.naive-cd8'] >= 5 or sr['pchic.total-cd8'] >= 5:
            pchic_supp = 1 
            
    elif sr.cline == 'monocyte_naive':
        if sr['pchic.monocytes'] >= 5:
            pchic_supp = 1             
            
    elif sr.cline == 'B-cell_naive':
        if sr['pchic.naive-b'] >= 5 or sr['pchic.total-b'] >= 5: #or sr['pchic.gms_merged'] >= 5:
            pchic_supp = 1        
            
    pchic_support.append(pchic_supp)
        
intersect_df['pchic_support'] = pchic_support

In [59]:
for grp, grp_df in intersect_df.groupby('cline'):
    print(grp)
    
    display(grp_df.loc[grp_df.pchic_support == 1])

B-cell_naive


Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename,startA,endA,startB,endB,pchic.monocytes,pchic.naive-b,pchic.naive-cd4,pchic.naive-cd8,pchic.non-activated-total-cd4,pchic.total-b,pchic.total-cd4,pchic.total-cd8,pchic_support
5,chr6,29609063,0.122563,0.016948,4.77e-13,520580.0,T1D_34012112_Gaulton,6:29609063,rs1233373,chr6,29609063,ENSG00000225864.1,HCG4P11,29691748,9.92e-08,6.5e-05,0.85,C,T,1.69,3.22,4.48,Direct_pieQTL,,B-cell_naive,6:29609063,ENSG00000225864,AL645939.1,29605000,29610000,29690000,29695000,,,,,,5.457031,,,1
46,chr6,31542308,0.219042,0.017353,1.58e-36,520580.0,T1D_34012112_Gaulton,6:31542308,rs1799964,chr6,31542308,ENSG00000204482.6,LST1,31553901,7e-05,0.020969,0.62,T,C,8.99,12.06,14.81,Direct_pieQTL,Crohn's disease,B-cell_naive,6:31542308,ENSG00000204482,LST1,31540000,31545000,31550000,31555000,,7.055537,,,,,,,1
47,chr6,31542308,0.2361,0.0315,6.798e-14,24250.0,T1D_32005708,6:31542308,rs1799964,chr6,31542308,ENSG00000204482.6,LST1,31553901,7e-05,0.020969,0.62,T,C,8.99,12.06,14.81,Direct_pieQTL,Crohn's disease,B-cell_naive,6:31542308,ENSG00000204482,LST1,31540000,31545000,31550000,31555000,,7.055537,,,,,,,1
119,chr6,32221934,0.258888,0.015102,7.17e-66,520580.0,T1D_34012112_Gaulton,6:32221934,rs9267996,chr6,32221934,ENSG00000204290.6,BTNL2,32374905,1.47e-05,0.005604,0.66,A,G,2.39,3.65,5.46,Indirect_pieQTL,Breast neoplasms|Kidney diseases,B-cell_naive,6:32221934,ENSG00000204290,BTNL2,32220000,32225000,32370000,32375000,,8.585047,,,,,,,1
120,chr6,32221934,0.1934,0.0287,1.563e-11,24250.0,T1D_32005708,6:32221934,rs9267996,chr6,32221934,ENSG00000204290.6,BTNL2,32374905,1.47e-05,0.005604,0.66,A,G,2.39,3.65,5.46,Indirect_pieQTL,Breast neoplasms|Kidney diseases,B-cell_naive,6:32221934,ENSG00000204290,BTNL2,32220000,32225000,32370000,32375000,,8.585047,,,,,,,1
121,chr6,32221934,0.1214,0.0186,7.53e-11,,T1D_34594039_GCST90018925,6:32221934,rs9267996,chr6,32221934,ENSG00000204290.6,BTNL2,32374905,1.47e-05,0.005604,0.66,A,G,2.39,3.65,5.46,Indirect_pieQTL,Breast neoplasms|Kidney diseases,B-cell_naive,6:32221934,ENSG00000204290,BTNL2,32220000,32225000,32370000,32375000,,8.585047,,,,,,,1
122,chr6,32223191,-0.533243,0.051957,1.03e-24,520580.0,T1D_34012112_Gaulton,6:32223191,rs9469094,chr6,32223191,ENSG00000204290.6,BTNL2,32374905,2.02e-05,0.007359,-1.21,G,C,3.8,1.5,,Indirect_pieQTL,Breast neoplasms|Chemical and drug induced liv...,B-cell_naive,6:32223191,ENSG00000204290,BTNL2,32220000,32225000,32370000,32375000,,8.585047,,,,,,,1
123,chr6,32223191,-0.533243,0.051957,1.03e-24,520580.0,T1D_34012112_Gaulton,6:32223191,rs9469094,chr6,32223191,ENSG00000228962.1,HCG23,32358287,4.01e-05,0.013137,-1.16,G,C,6.02,2.5,,Indirect_pieQTL,Breast neoplasms|Chemical and drug induced liv...,B-cell_naive,6:32223191,ENSG00000228962,ENSG00000228962,32220000,32225000,32355000,32360000,,8.082728,,,,,,,1
124,chr6,32223531,0.526488,0.015504,9.34e-253,520580.0,T1D_34012112_Gaulton,6:32223531,rs9268000,chr6,32223531,ENSG00000204290.6,BTNL2,32374905,2.01e-05,0.007336,0.68,A,C,2.48,3.76,5.63,Indirect_pieQTL,Breast neoplasms,B-cell_naive,6:32223531,ENSG00000204290,BTNL2,32220000,32225000,32370000,32375000,,8.585047,,,,,,,1
125,chr6,32223531,0.3825,0.0287,2.202e-40,24250.0,T1D_32005708,6:32223531,rs9268000,chr6,32223531,ENSG00000204290.6,BTNL2,32374905,2.01e-05,0.007336,0.68,A,C,2.48,3.76,5.63,Indirect_pieQTL,Breast neoplasms,B-cell_naive,6:32223531,ENSG00000204290,BTNL2,32220000,32225000,32370000,32375000,,8.585047,,,,,,,1


CD4_T-cell_naive


Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename,startA,endA,startB,endB,pchic.monocytes,pchic.naive-b,pchic.naive-cd4,pchic.naive-cd8,pchic.non-activated-total-cd4,pchic.total-b,pchic.total-cd4,pchic.total-cd8,pchic_support
6,chr6,30028800,0.148615,0.016107,2.79e-20,520580.0,T1D_34012112_Gaulton,6:30028800,rs9295829,chr6,30028800,ENSG00000225864.1,HCG4P11,29691748,4.98e-05,0.01741155,0.72,A,G,0.74,1.41,1.28,Direct_pieQTL,Chemical and drug induced liver injury|Vitilig...,CD4_T-cell_naive,6:30028800,ENSG00000225864,AL645939.1,30025000,30030000,29690000,29695000,,,,,18.74768,,,,1
69,chr6,31671879,0.410726,0.026431,1.87e-54,520580.0,T1D_34012112_Gaulton,6:31671879,rs9267545,chr6,31671879,ENSG00000204428.8,LY6G5C,31651817,1.78e-05,0.00733996,-0.98,G,A,2.63,1.64,,Indirect_pieQTL,Glomerulonephritis membranous|Lupus erythemat...,CD4_T-cell_naive,6:31671879,ENSG00000204428,LY6G5C,31670000,31675000,31650000,31655000,,,6.977242,,,,,,1
70,chr6,31671879,0.4394,0.0454,3.59e-22,24250.0,T1D_32005708,6:31671879,rs9267545,chr6,31671879,ENSG00000204428.8,LY6G5C,31651817,1.78e-05,0.00733996,-0.98,G,A,2.63,1.64,,Indirect_pieQTL,Glomerulonephritis membranous|Lupus erythemat...,CD4_T-cell_naive,6:31671879,ENSG00000204428,LY6G5C,31670000,31675000,31650000,31655000,,,6.977242,,,,,,1
113,chr6,31801233,0.174763,0.016155,2.83e-27,520580.0,T1D_34012112_Gaulton,6:31801233,rs2736430,chr6,31801233,ENSG00000204472.8,AIF1,31582961,3.19e-05,0.01210044,0.6,C,T,54.78,67.56,73.32,Direct_pieQTL,Rheumatoid arthritis,CD4_T-cell_naive,6:31801233,ENSG00000204472,AIF1,31800000,31805000,31580000,31585000,,,6.593487,,,,,,1
142,chr6,90976768,0.199873,0.018481,2.93e-27,520580.0,T1D_34012112_Gaulton,6:90976768,rs72928038,chr6,90976768,ENSG00000112182.10,BACH2,91006627,2.71e-06,0.001509464,-1.15,G,A,187.78,160.92,150.62,Indirect_pieQTL,Thyroid microsomal antibodies,CD4_T-cell_naive,6:90976768,ENSG00000112182,BACH2,90975000,90980000,91005000,91010000,,,9.281679,,,,,,1
163,chr16,11350991,-0.097107,0.014118,6.06e-12,520580.0,T1D_34012112_Gaulton,16:11350991,rs243330,chr16,11350991,ENSG00000262703.1,RP11-485G7.6,11443178,7.42e-09,3.02e-05,0.77,C,T,0.55,1.32,1.94,Direct_pieQTL,Crohn's disease|Psoriasis,CD4_T-cell_naive,16:11350991,ENSG00000262703,AC009121.1,11350000,11355000,11440000,11445000,,,6.042755,,,,,,1
169,chr17,44205690,-0.096013,0.017304,2.88e-08,520580.0,T1D_34012112_Gaulton,17:44205690,rs4471723,chr17,44205690,ENSG00000214401.4,KANSL1-AS1,44270942,1.74e-14,2.46e-11,1.14,C,T,7.14,16.79,29.08,Direct_pieQTL,Bone density|Brain|Forced expiratory volume|Lu...,CD4_T-cell_naive,17:44205690,ENSG00000214401,KANSL1-AS1,44205000,44210000,44270000,44275000,,,7.39089,,,,,,1
186,chr6,26501768,0.288,0.0326,9.842999999999998e-19,24250.0,T1D_32005708,6:26501768,rs2295593,chr6,26501768,ENSG00000186470.9,BTN3A2,26365387,8.51e-05,0.02736032,-0.95,C,T,178.54,118.35,140.99,Indirect_pieQTL,Lupus erythematosus systemic|Platelet functio...,CD4_T-cell_naive,6:26501768,ENSG00000186470,BTN3A2,26500000,26505000,26365000,26370000,,,,,11.277539,,,,1
235,chr6,30042955,0.1608,0.0186,4.554e-18,,T1D_34594039_GCST90018925,6:30042955,rs1150736,chr6,30042955,ENSG00000204623.4,ZNRD1-AS1,30029417,5.74e-05,0.01970421,0.68,C,T,4.37,5.41,6.07,Direct_pieQTL,,CD4_T-cell_naive,6:30042955,ENSG00000204623,ZNRD1ASP,30040000,30045000,30025000,30030000,,,6.14492,,,,,,1


CD8_T-cell_naive


Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename,startA,endA,startB,endB,pchic.monocytes,pchic.naive-b,pchic.naive-cd4,pchic.naive-cd8,pchic.non-activated-total-cd4,pchic.total-b,pchic.total-cd4,pchic.total-cd8,pchic_support
7,chr6,30028800,0.148615,0.016107,2.790000e-20,520580.0,T1D_34012112_Gaulton,6:30028800,rs9295829,chr6,30028800,ENSG00000225864.1,HCG4P11,29691748,0.000007,0.003302,0.80,A,G,1.27,2.36,3.25,Direct_pieQTL,Chemical and drug induced liver injury|Vitilig...,CD8_T-cell_naive,6:30028800,ENSG00000225864,AL645939.1,30025000,30030000,29690000,29695000,,,,22.576318,,,,,1
21,chr6,31509284,0.293320,0.015682,4.560000e-78,520580.0,T1D_34012112_Gaulton,6:31509284,rs3130059,chr6,31509284,ENSG00000204482.6,LST1,31553901,0.000006,0.002784,0.71,G,C,8.86,10.14,13.47,Indirect_pieQTL,Myasthenia gravis,CD8_T-cell_naive,6:31509284,ENSG00000204482,LST1,31505000,31510000,31550000,31555000,,,,,,,,15.927181,1
22,chr6,31509284,0.321500,0.028000,1.812000e-30,24250.0,T1D_32005708,6:31509284,rs3130059,chr6,31509284,ENSG00000204482.6,LST1,31553901,0.000006,0.002784,0.71,G,C,8.86,10.14,13.47,Indirect_pieQTL,Myasthenia gravis,CD8_T-cell_naive,6:31509284,ENSG00000204482,LST1,31505000,31510000,31550000,31555000,,,,,,,,15.927181,1
23,chr6,31509284,0.208000,0.018200,2.268000e-30,,T1D_34594039_GCST90018925,6:31509284,rs3130059,chr6,31509284,ENSG00000204482.6,LST1,31553901,0.000006,0.002784,0.71,G,C,8.86,10.14,13.47,Indirect_pieQTL,Myasthenia gravis,CD8_T-cell_naive,6:31509284,ENSG00000204482,LST1,31505000,31510000,31550000,31555000,,,,,,,,15.927181,1
24,chr6,31509779,0.287890,0.015681,2.810000e-75,520580.0,T1D_34012112_Gaulton,6:31509779,rs2239527,chr6,31509779,ENSG00000204482.6,LST1,31553901,0.000006,0.002784,0.71,C,G,8.86,10.14,13.47,Indirect_pieQTL,Myasthenia gravis,CD8_T-cell_naive,6:31509779,ENSG00000204482,LST1,31505000,31510000,31550000,31555000,,,,,,,,15.927181,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,chr6,28094366,0.202000,0.026800,4.365000e-14,24250.0,T1D_32005708,6:28094366,rs6922063,chr6,28094366,ENSG00000187626.7,ZKSCAN4,28227011,0.000090,0.026584,0.72,G,A,10.80,12.91,14.41,Indirect_pieQTL,,CD8_T-cell_naive,6:28094366,ENSG00000187626,ZKSCAN4,28090000,28095000,28225000,28230000,,,,,,,,6.095410,1
221,chr6,26537801,0.280000,0.032800,1.485000e-17,24250.0,T1D_32005708,6:26537801,rs6920256,chr6,26537801,ENSG00000186470.9,BTN3A2,26365387,0.000005,0.002568,-1.12,G,A,174.99,108.99,127.31,Direct_pieQTL,Lupus erythematosus systemic|Platelet functio...,CD8_T-cell_naive,6:26537801,ENSG00000186470,BTN3A2,26535000,26540000,26365000,26370000,,,,,,,,11.119493,1
228,chr6,28104634,0.200500,0.026700,6.530000e-14,24250.0,T1D_32005708,6:28104634,rs9380057,chr6,28104634,ENSG00000187626.7,ZKSCAN4,28227011,0.000090,0.026584,0.72,G,T,10.80,12.91,14.41,Indirect_pieQTL,,CD8_T-cell_naive,6:28104634,ENSG00000187626,ZKSCAN4,28100000,28105000,28225000,28230000,,,,7.619703,,,,,1
231,chr6,28103691,0.200600,0.026700,6.424000e-14,24250.0,T1D_32005708,6:28103691,rs1340004,chr6,28103691,ENSG00000187626.7,ZKSCAN4,28227011,0.000090,0.026584,0.72,C,T,10.80,12.91,14.41,Indirect_pieQTL,,CD8_T-cell_naive,6:28103691,ENSG00000187626,ZKSCAN4,28100000,28105000,28225000,28230000,,,,7.619703,,,,,1


monocyte_naive


Unnamed: 0,CHR,POS,BETA,SE,P,N,gwas_source,sid_x,pieQTL.ID,Chromosome,pieQTL.Position,Target_geneID,Target_geneName,TSS,pvalue,FDR (DICE),beta,ref,alt,Mean.TPM.Homozygous.Reference,Mean.TPM.Heterozygous,Mean.TPM.Homozygous.Alternative,Interaction_type,GWAS.Trait,cline,sid_y,geneid,genename,startA,endA,startB,endB,pchic.monocytes,pchic.naive-b,pchic.naive-cd4,pchic.naive-cd8,pchic.non-activated-total-cd4,pchic.total-b,pchic.total-cd4,pchic.total-cd8,pchic_support
2,chr5,131630852,0.078192,0.014121,3.07e-08,520580.0,T1D_34012112_Gaulton,5:131630852,rs35260072,chr5,131630852,ENSG00000197375.8,SLC22A5,131705444,2.04e-10,1e-06,-0.91,A,C,2.82,2.02,1.41,Direct_pieQTL,Ankylosing spondylitis|Autoimmune diseases|Bod...,monocyte_naive,5:131630852,ENSG00000197375,SLC22A5,131630000,131635000,131705000,131710000,10.510099,,,,,,,,1
10,chr6,30763004,0.257858,0.015499,3.7800000000000003e-62,520580.0,T1D_34012112_Gaulton,6:30763004,rs3131059,chr6,30763004,ENSG00000137312.10,FLOT1,30710510,6.96e-05,0.022577,0.62,T,C,69.87,79.9,78.21,Direct_pieQTL,,monocyte_naive,6:30763004,ENSG00000137312,FLOT1,30760000,30765000,30710000,30715000,8.978399,,,,,,,,1
11,chr6,30763004,0.4663,0.0281,5.885e-62,24250.0,T1D_32005708,6:30763004,rs3131059,chr6,30763004,ENSG00000137312.10,FLOT1,30710510,6.96e-05,0.022577,0.62,T,C,69.87,79.9,78.21,Direct_pieQTL,,monocyte_naive,6:30763004,ENSG00000137312,FLOT1,30760000,30765000,30710000,30715000,8.978399,,,,,,,,1
71,chr6,31678028,0.30264,0.015967,4.11e-80,520580.0,T1D_34012112_Gaulton,6:31678028,rs805288,chr6,31678028,ENSG00000204428.8,LY6G5C,31651817,1.02e-05,0.004389,-0.63,C,T,2.51,2.1,1.8,Indirect_pieQTL,Blood pressure|Diabetes mellitus type 1|Hyper...,monocyte_naive,6:31678028,ENSG00000204428,LY6G5C,31675000,31680000,31650000,31655000,8.813424,,,,,,,,1
72,chr6,31678028,0.3793,0.03,1.229e-36,24250.0,T1D_32005708,6:31678028,rs805288,chr6,31678028,ENSG00000204428.8,LY6G5C,31651817,1.02e-05,0.004389,-0.63,C,T,2.51,2.1,1.8,Indirect_pieQTL,Blood pressure|Diabetes mellitus type 1|Hyper...,monocyte_naive,6:31678028,ENSG00000204428,LY6G5C,31675000,31680000,31650000,31655000,8.813424,,,,,,,,1
73,chr6,31678730,0.297653,0.015801,3.69e-79,520580.0,T1D_34012112_Gaulton,6:31678730,rs805287,chr6,31678730,ENSG00000204428.8,LY6G5C,31651817,5.57e-05,0.018655,-0.58,A,G,2.43,2.22,1.76,Indirect_pieQTL,,monocyte_naive,6:31678730,ENSG00000204428,LY6G5C,31675000,31680000,31650000,31655000,8.813424,,,,,,,,1
74,chr6,31678730,0.3232,0.03,4.378e-27,24250.0,T1D_32005708,6:31678730,rs805287,chr6,31678730,ENSG00000204428.8,LY6G5C,31651817,5.57e-05,0.018655,-0.58,A,G,2.43,2.22,1.76,Indirect_pieQTL,,monocyte_naive,6:31678730,ENSG00000204428,LY6G5C,31675000,31680000,31650000,31655000,8.813424,,,,,,,,1
170,chr19,10491005,-0.142175,0.026005,4.57e-08,520580.0,T1D_34012112_Gaulton,19:10491005,rs2304257,chr19,10491005,ENSG00000220201.3,ZGLP1,10420556,8.07e-06,0.012444,1.31,C,G,2.4,3.52,,Indirect_pieQTL,Crohn's disease|Inflammatory bowel disease|Pso...,monocyte_naive,19:10491005,ENSG00000220201,ZGLP1,10490000,10495000,10420000,10425000,10.505228,,,,,,,,1
173,chr6,28863264,0.1607,0.026,6.046e-10,24250.0,T1D_32005708,6:28863264,rs3135315,chr6,28863264,ENSG00000204713.6,TRIM27,28891766,2.78e-05,0.01037,0.68,C,G,8.34,10.22,11.41,Direct_pieQTL,Body height,monocyte_naive,6:28863264,ENSG00000204713,TRIM27,28860000,28865000,28890000,28895000,39.977165,,,,,,,,1
175,chr6,28865713,0.1615,0.026,5.099e-10,24250.0,T1D_32005708,6:28865713,rs209137,chr6,28865713,ENSG00000204713.6,TRIM27,28891766,2.78e-05,0.01037,0.68,C,T,8.34,10.22,11.41,Direct_pieQTL,Body height,monocyte_naive,6:28865713,ENSG00000204713,TRIM27,28865000,28870000,28890000,28895000,39.977165,,,,,,,,1


In [60]:
pieqtls_sgls_fn = os.path.join(outdir, 'pieqtls_sgls_with_pchic_support.xlsx')
intersect_df.to_excel(pieqtls_sgls_fn, index=False)