# Immune disease associations of Neanderthal-introgressed SNPs

This code investigates if Neanderthal-introgressed SNPs have been associated with any immune-related diseases, including infectious diseases, allergic diseases, autoimmune diseases and autoinflammatory diseases, using data from the NHGRI-EBI GWAS Catalog.

Neanderthal-introgressed SNPs from:
1. Dannemann M, Prufer K & Kelso J. Functional implications of Neandertal introgression in modern humans. *Genome Biol* 2017 **18**:61.
2. Simonti CN *et al.* The phenotypic legacy of admixture between modern humans and Neandertals. *Science* 2016 **351**:737-41.  

GWAS summary statistics from:
* [GWAS Catalog](https://www.ebi.ac.uk/gwas/docs/file-downloads)

In [1]:
# Import modules
import pandas as pd

## Get Neanderthal SNPs present in GWAS Catalog

In [2]:
# Load Neanderthal-introgressed SNPs
neanderthal = pd.read_csv('../neanderthal/allpop_fixed.csv', usecols=['Chromosome', 'Position', 'Source', 'ID'])

In [3]:
# Load GWAS catalog
catalog = pd.read_csv('GWAS_Catalog.tsv', sep="\t", header=0,
                      usecols=['DISEASE/TRAIT', 'CHR_ID', 'CHR_POS', 'REPORTED GENE(S)', 'MAPPED_GENE',
                               'STRONGEST SNP-RISK ALLELE', 'SNPS', 'RISK ALLELE FREQUENCY', 'P-VALUE', 'OR or BETA',
                               '95% CI (TEXT)', 'MAPPED_TRAIT', 'STUDY ACCESSION'], low_memory=False)
catalog = catalog.loc[catalog.CHR_ID != 'X'].copy()
catalog = catalog.loc[catalog.CHR_ID != 'Y'].copy()
catalog.rename(columns={'CHR_ID': 'Chromosome', 'CHR_POS': 'Position', 'SNPS': 'ID'}, inplace=True)

In [4]:
# Neanderthal SNPs present in GWAS catalog
nean_catalog = neanderthal.merge(catalog.drop(columns=['Chromosome', 'Position']), how='inner', on='ID')
nean_catalog

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
0,1,3431555,both,rs947350,Lung function (FEV1/FVC),,MEGF6,rs947350-?,NR,1E-34,,,FEV/FEC ratio,GCST007080
1,1,3651409,both,rs10910018,Visceral fat,"TP73, CCDC27, KIAA0495, LRRC47",TP73,rs10910018-A,0.04,2E-6,,,visceral adipose tissue measurement,GCST001525
2,1,3651409,both,rs10910018,Visceral fat,"TP73, CCDC27, KIAA0495, LRRC47",TP73,rs10910018-A,0.04,2E-6,,,visceral adipose tissue measurement,GCST001525
3,1,4449204,both,rs72632736,Systemic juvenile idiopathic arthritis,"LOC284661, AJAP1",EEF1DP6 - LINC01777,rs72632736-?,,3E-9,2.400000,[1.80-3.30],systemic juvenile idiopathic arthritis,GCST004025
4,1,9472981,both,rs118100927,Height,"SPSB1, LOC100506022",SPSB1 - BX323043.1,rs118100927-?,NR,9E-12,0.034456,[0.025-0.044] unit increase,body height,GCST008839
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1306,22,32783904,dannemann_only,rs12530,IgG glycosylation,NR,RTCB,rs12530-C,0.18432396929239,3E-6,0.182800,[0.11-0.26] unit increase,serum IgG glycosylation measurement,GCST001848
1307,22,50357625,dannemann_only,rs117325373,Response to paliperidone in schizophrenia (pos...,,PIM3,rs117325373-G,NR,2E-6,6.346900,[NR] unit decrease,"schizophrenia, response to paliperidone, schiz...",GCST004040
1308,22,50722408,dannemann_only,rs79966207,Blond vs. brown/black hair color,PLXNB2,PLXNB2,rs79966207-C,0.1776,5E-10,1.063000,NR,hair color,GCST006988
1309,22,50722408,dannemann_only,rs79966207,Body mass index,NR,PLXNB2,rs79966207-?,NR,6E-9,,,body mass index,GCST009871


## Immune-related diseases associated with Neanderthal SNPs

### Infections

In [5]:
nean_catalog.loc[nean_catalog['DISEASE/TRAIT'].str.contains('influenza')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
700,3,191695113,dannemann_only,rs4261353,Severe influenza A (H1N1) infection,FGF12,AC026320.2 - AC026320.1,rs4261353-?,0.001866,1e-08,27.44,[4.948-152.1],influenza A (H1N1),GCST003123


In [6]:
nean_catalog.loc[nean_catalog['DISEASE/TRAIT'].str.contains('influenza')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
700,3,191695113,dannemann_only,rs4261353,Severe influenza A (H1N1) infection,FGF12,AC026320.2 - AC026320.1,rs4261353-?,0.001866,1e-08,27.44,[4.948-152.1],influenza A (H1N1),GCST003123


In [7]:
nean_catalog.loc[nean_catalog['DISEASE/TRAIT'].str.contains('HIV')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
377,4,161506897,both,rs17291045,HIV-1 control,intergenic,LINC02477,rs17291045-?,NR,5e-08,,,HIV-1 infection,GCST000549
483,9,83282271,simonti_only,rs72749479,Time-dependent creatinine clearance change res...,NR,MTND2P9 - RPS19P6,rs72749479-?,,7e-07,,,"response to tenofovir, HIV infection, creatini...",GCST006069
484,9,83285174,simonti_only,rs72749483,Time-dependent creatinine clearance change res...,NR,MTND2P9 - RPS19P6,rs72749483-?,,7e-07,,,"response to tenofovir, HIV infection, creatini...",GCST006069
827,6,117753834,dannemann_only,rs36111427,Time-dependent creatinine clearance change res...,NR,AL132671.2,rs36111427-?,,9e-06,,,"response to tenofovir, HIV infection, creatini...",GCST006069
976,10,44524675,dannemann_only,rs17154929,HIV-associated dementia,intergenic,LINC00841 - AL137026.2,rs17154929-T,NR,1e-07,,,AIDS dementia,GCST001542


In [8]:
nean_catalog.loc[nean_catalog['DISEASE/TRAIT'].str.contains('Malaria')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
70,11,4790575,simonti_only,rs12788102,Malaria,HBB,"OR51F1, MMP26",rs12788102-A,NR,2e-16,2.15,[1.79-2.59],malaria,GCST002033


### Allergic diseases

In [9]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('aller')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
143,12,48196982,simonti_only,rs55726902,"Allergic disease (asthma, hay fever or eczema)",HDAC7,HDAC7,rs55726902-G,0.8,3e-16,1.051,[1.04-1.06],allergy,GCST005038
341,3,196373582,simonti_only,rs4916533,Asthma or allergic disease (pleiotropy),NRROS,"NRROS, PIGX",rs4916533-?,NR,2e-08,1.075269,,"allergy, asthma",GCST007564
342,3,196373582,simonti_only,rs4916533,Hay fever and/or eczema,NR,"NRROS, PIGX",rs4916533-C,0.91931,6e-10,1.070778,[1.05-1.09],"Eczema, allergic rhinitis",GCST009717
344,4,38811051,simonti_only,rs11727978,Allergy,"TLR1, TLR6",TLR1,rs11727978-?,NR,1e-15,1.083,[1.062-1.104],allergy,GCST003990
345,4,38819403,simonti_only,rs66819621,Allergic rhinitis,TLR1,TLR1,rs66819621-A,0.8428,1.9999999999999998e-25,1.179941,[1.15-1.22],allergic rhinitis,GCST009719
701,3,196343817,dannemann_only,rs113048054,"Allergic disease (asthma, hay fever or eczema)",NR,AC092933.2 - LINC01063,rs113048054-A,0.93413,4e-12,1.080497,[1.06-1.1],"Eczema, allergic rhinitis, asthma",GCST009716
729,4,123141070,dannemann_only,rs45613035,Hay fever and/or eczema,NR,KIAA1109,rs45613035-C,0.09838,6e-20,1.094,[1.074-1.115],"Eczema, allergic rhinitis",GCST009717


In [10]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('asthma')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
67,10,119647256,simonti_only,rs17642749,Asthma (sex interaction),RAB11FIP2,AL513324.1 - RAB11FIP2,rs17642749-G,0.086,4e-07,2.97,[1.90-4.63],"sex interaction measurement, asthma",GCST002445
291,21,36453837,both,rs2242900,Asthma,RUNX1,RUNX1,rs2242900-G,0.14,2e-18,,,asthma,GCST010043
341,3,196373582,simonti_only,rs4916533,Asthma or allergic disease (pleiotropy),NRROS,"NRROS, PIGX",rs4916533-?,NR,2e-08,1.075269,,"allergy, asthma",GCST007564
369,4,123359569,both,rs62322662,Asthma,"ADAD1, IL2",ADAD1 - IL2,rs62322662-G,NR,2e-18,1.119,[1.09-1.14],asthma,GCST007798
370,4,123359569,both,rs62322662,Asthma (adult onset),"ADAD1, IL2",ADAD1 - IL2,rs62322662-?,NR,8e-10,1.084,[1.06-1.11],adult onset asthma,GCST007799
701,3,196343817,dannemann_only,rs113048054,"Allergic disease (asthma, hay fever or eczema)",NR,AC092933.2 - LINC01063,rs113048054-A,0.93413,4e-12,1.080497,[1.06-1.1],"Eczema, allergic rhinitis, asthma",GCST009716
702,3,196343817,dannemann_only,rs113048054,Asthma,"NNROS, LRRC33",AC092933.2 - LINC01063,rs113048054-G,0.06587,3e-11,,,asthma,GCST009720
703,3,196349004,dannemann_only,rs112336433,Asthma,NRROS,AC092933.2 - LINC01063,rs112336433-C,0.93272,3e-08,1.069407,[1.05-1.09],asthma,GCST008916
728,4,123141070,dannemann_only,rs45613035,Asthma (childhood onset),NR,KIAA1109,rs45613035-?,NR,9e-12,,,childhood onset asthma,GCST009841
730,4,123141070,dannemann_only,rs45613035,Asthma,"ADAD1, IL2, IL21, IL21-AS1, KIAA1109",KIAA1109,rs45613035-C,0.098644,1e-12,1.077323,[1.06-1.1],asthma,GCST008916


In [11]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('Eczema')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
342,3,196373582,simonti_only,rs4916533,Hay fever and/or eczema,NR,"NRROS, PIGX",rs4916533-C,0.91931,6e-10,1.070778,[1.05-1.09],"Eczema, allergic rhinitis",GCST009717
701,3,196343817,dannemann_only,rs113048054,"Allergic disease (asthma, hay fever or eczema)",NR,AC092933.2 - LINC01063,rs113048054-A,0.93413,4e-12,1.080497,[1.06-1.1],"Eczema, allergic rhinitis, asthma",GCST009716
704,3,196368501,dannemann_only,rs12152276,Eczema,,"PIGX, NRROS",rs12152276-?,NR,7e-10,,,Eczema,GCST007075
729,4,123141070,dannemann_only,rs45613035,Hay fever and/or eczema,NR,KIAA1109,rs45613035-C,0.09838,6e-20,1.094,[1.074-1.115],"Eczema, allergic rhinitis",GCST009717
734,4,123141070,dannemann_only,rs45613035,Eczema,,KIAA1109,rs45613035-?,NR,1.9999999999999998e-26,,,Eczema,GCST007075
1296,21,36457506,dannemann_only,rs73203093,Eczema,,RUNX1,rs73203093-?,NR,7e-09,,,Eczema,GCST007075


### Autoimmune/autoinflammatory diseases

In [12]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('lupus')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
447,7,128695983,both,rs13239597,Systemic lupus erythematosus and Systemic scle...,"TNPO3, IRF5",TPI1P2,rs13239597-?,NR,1.0000000000000001e-29,1.664,[NR],"systemic scleroderma, systemic lupus erythemat...",GCST002069
578,1,198594769,dannemann_only,rs34889541,Systemic lupus erythematosus,"PTPRC, CD45",AL157402.1 - PTPRC,rs34889541-G,0.86,3e-10,1.282051,[1.19-1.39],systemic lupus erythematosus,GCST003622
579,1,198594769,dannemann_only,rs34889541,Systemic lupus erythematosus,"PTPRC, CD45",AL157402.1 - PTPRC,rs34889541-G,0.93,2e-12,1.234568,[1.16-1.32],systemic lupus erythematosus,GCST003622
857,7,128585616,dannemann_only,rs35000415,Systemic lupus erythematosus,IRF5,IRF5,rs35000415-?,0.108,2.0000000000000003e-45,1.8,[NR],systemic lupus erythematosus,GCST003622
858,7,128585616,dannemann_only,rs35000415,Systemic lupus erythematosus,IRF5,IRF5,rs35000415-T,,1.0000000000000001e-60,1.83,[NR],systemic lupus erythematosus,GCST003156
859,7,128585616,dannemann_only,rs35000415,Systemic lupus erythematosus,"IRF5, TNPO3",IRF5,rs35000415-T,NR,1e-99,1.82,[1.69-1.96],systemic lupus erythematosus,GCST005752
861,7,128594183,dannemann_only,rs10488631,Systemic lupus erythematosus,IRF5,AC025594.2 - TNPO3,rs10488631-C,0.11,7e-18,1.92,[1.66-2.22],systemic lupus erythematosus,GCST000996
862,7,128594183,dannemann_only,rs10488631,Systemic lupus erythematosus,"TNPO3, IRF5",AC025594.2 - TNPO3,rs10488631-C,0.12,2e-11,,,systemic lupus erythematosus,GCST000144
866,7,128594183,dannemann_only,rs10488631,Systemic lupus erythematosus,TNPO3,AC025594.2 - TNPO3,rs10488631-C,0.11,2e-13,1.829,[1.684-1.99],systemic lupus erythematosus,GCST002463
873,7,128594183,dannemann_only,rs10488631,Systemic lupus erythematosus,"IRF5, TNPO3",AC025594.2 - TNPO3,rs10488631-C,,9e-110,1.92,[1.81–2.03],systemic lupus erythematosus,GCST003155


In [13]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('rheumatoid')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
452,7,129962414,simonti_only,rs2306848,Rheumatoid arthritis,CPA4,CPA4,rs2306848-G,0.948,6e-12,,,rheumatoid arthritis,GCST007843
871,7,128594183,dannemann_only,rs10488631,Rheumatoid arthritis,IRF5,AC025594.2 - TNPO3,rs10488631-C,0.11,4e-11,1.19,[NR],rheumatoid arthritis,GCST000679
1148,14,61940675,dannemann_only,rs3783782,Rheumatoid arthritis,PRKCH,PRKCH,rs3783782-A,0.09,2e-09,1.14,[1.09-1.18],rheumatoid arthritis,GCST002318
1149,14,61940675,dannemann_only,rs3783782,Rheumatoid arthritis,PRKCH,PRKCH,rs3783782-A,0.22,4e-09,1.14,[1.09-1.19],rheumatoid arthritis,GCST002318
1150,14,61940675,dannemann_only,rs3783782,Rheumatoid arthritis,PRKCH,PRKCH,rs3783782-A,NR,1e-07,0.151127,[0.095-0.207] unit increase,rheumatoid arthritis,GCST006959
1151,14,61940675,dannemann_only,rs3783782,Rheumatoid arthritis,PRKCH,PRKCH,rs3783782-A,0.23,1e-07,0.16062,[0.1-0.22] unit increase,rheumatoid arthritis,GCST006959


In [14]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('scleroderma')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
446,7,128684316,simonti_only,rs62478615,Systemic sclerosis,IRF5,TNPO3,rs62478615-?,,1e-10,1.53,,systemic scleroderma,GCST005534
447,7,128695983,both,rs13239597,Systemic lupus erythematosus and Systemic scle...,"TNPO3, IRF5",TPI1P2,rs13239597-?,NR,1.0000000000000001e-29,1.664,[NR],"systemic scleroderma, systemic lupus erythemat...",GCST002069
448,7,128695983,both,rs13239597,Limited cutaneous systemic scleroderma,TNPO3,TPI1P2,rs13239597-?,NR,9e-07,,,limited scleroderma,GCST005555
449,7,128708797,simonti_only,rs17340351,Systemic sclerosis,TNPO3,AC018639.1 - AC011005.2,rs17340351-?,NR,2e-10,,,systemic scleroderma,GCST005554
450,7,128708797,simonti_only,rs17340351,Diffuse cutaneous systemic sclerosis,TNPO3,AC018639.1 - AC011005.2,rs17340351-?,NR,7e-07,,,diffuse scleroderma,GCST005553
451,7,128708797,simonti_only,rs17340351,Systemic sclerosis (anti-topoisomerase-positive),TNPO3,AC018639.1 - AC011005.2,rs17340351-?,NR,1e-08,,,anti-topoisomerase-I-antibody-positive systemi...,GCST005551
863,7,128594183,dannemann_only,rs10488631,Systemic sclerosis,"TNPO, IRF5",AC025594.2 - TNPO3,rs10488631-C,NR,2e-13,1.5,[1.35-1.67],systemic scleroderma,GCST000650
865,7,128594183,dannemann_only,rs10488631,Systemic sclerosis,"TNPO3, IRF5",AC025594.2 - TNPO3,rs10488631-C,0.09,4e-07,1.35,[1.20-1.51],systemic scleroderma,GCST001146
867,7,128594183,dannemann_only,rs10488631,Systemic sclerosis,IRF5,AC025594.2 - TNPO3,rs10488631-?,NR,2e-10,1.5,[1.32-1.69],systemic scleroderma,GCST001160
868,7,128594183,dannemann_only,rs10488631,Systemic sclerosis,IRF5,AC025594.2 - TNPO3,rs10488631-?,NR,8e-07,1.63,[1.34-1.98],systemic scleroderma,GCST001156


In [15]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('Sjogren')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
445,7,128681062,both,rs17339836,Sjögren's syndrome,"IRF5, TNPO3",TNPO3,rs17339836-T,0.12,2e-16,1.58,[1.36–1.84],Sjogren syndrome,GCST004878


In [16]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('Grave')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
548,1,160464911,dannemann_only,rs1265883,Graves' disease,SLAMF6,SLAMF6,rs1265883-C,0.1,2e-18,1.34,[1.25-1.43],Graves disease,GCST001984


In [17]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('glomerulonephritis')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
255,2,160878364,simonti_only,rs17830558,Membranous nephropathy,PLA2R1,PLA2R1,rs17830558-T,0.52,4e-10,1.87,[1.54–2.28],membranous glomerulonephritis,GCST003402
258,2,160914156,simonti_only,rs17831251,Membranous nephropathy,PLA2R1,PLA2R1,rs17831251-C,NR,5e-103,2.25,[2.09-2.42],membranous glomerulonephritis,GCST010004
259,2,160914156,simonti_only,rs17831251,Membranous nephropathy,PLA2R1,PLA2R1,rs17831251-C,0.61,5e-48,1.98,[1.81-2.17],membranous glomerulonephritis,GCST010005
260,2,160914156,simonti_only,rs17831251,Membranous nephropathy,PLA2R1,PLA2R1,rs17831251-C,0.7,4e-61,2.81,[2.48-3.17],membranous glomerulonephritis,GCST010006
261,2,160917497,simonti_only,rs4664308,Idiopathic membranous nephropathy,"LY75, ITGB6, PLA2R1, RBMS1",PLA2R1,rs4664308-A,0.43,9.000000000000001e-29,2.28,[1.96-2.64],membranous glomerulonephritis,GCST000984


In [18]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('colitis')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
139,12,40528432,both,rs12422544,Ulcerative colitis,NR,SLC2A13 - LINC02555,rs12422544-G,0.01915,3e-06,1.215345,[1.13-1.3],ulcerative colitis,GCST003045
1162,14,75741751,dannemann_only,rs1569328,Chronic inflammatory diseases (ankylosing spon...,FOS,AF111167.1 - FOS,rs1569328-?,NR,2e-09,,,"ankylosing spondylitis, psoriasis, ulcerative ...",GCST005537
1223,18,77220616,dannemann_only,rs7236492,Chronic inflammatory diseases (ankylosing spon...,NFATC1,"AC018445.5, NFATC1",rs7236492-?,NR,1e-07,,,"ankylosing spondylitis, psoriasis, ulcerative ...",GCST005537


In [19]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('Crohn')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
140,12,40528432,both,rs12422544,Crohn's disease,NR,SLC2A13 - LINC02555,rs12422544-G,0.01915,3.9999999999999997e-25,1.455191,[1.38-1.53],Crohn's disease,GCST003044
978,10,64415184,dannemann_only,rs7076156,Crohn's disease,"ADO, ZNF365, ERG2","AC067752.1, AC024598.1",rs7076156-G,0.751,7e-09,1.19,[1.10-1.30],Crohn's disease,GCST001438
1099,12,40601940,dannemann_only,rs11175593,Crohn's disease,"MUC19, LRRK2","LRRK2-DT, LRRK2, LINC02471",rs11175593-T,0.02,3e-10,1.54,[NR],Crohn's disease,GCST000207
1128,13,40833012,dannemann_only,rs17061048,Crohn's disease,NR,LINC00598,rs17061048-A,0.95,1e-07,1.161814,[1.11-1.22],Crohn's disease,GCST003044
1160,14,75741751,dannemann_only,rs1569328,Crohn's disease,NR,AF111167.1 - FOS,rs1569328-G,0.83,6e-11,1.115402,[1.08-1.15],Crohn's disease,GCST003044
1162,14,75741751,dannemann_only,rs1569328,Chronic inflammatory diseases (ankylosing spon...,FOS,AF111167.1 - FOS,rs1569328-?,NR,2e-09,,,"ankylosing spondylitis, psoriasis, ulcerative ...",GCST005537
1163,14,75741751,dannemann_only,rs1569328,Crohn's disease,"FOS, RP11-293M10.1",AF111167.1 - FOS,rs1569328-?,,1e-07,,,Crohn's disease,GCST004132
1221,18,77220616,dannemann_only,rs7236492,Crohn's disease,"NFATC1, TST","AC018445.5, NFATC1",rs7236492-G,0.85,9e-09,1.104796,[1.07-1.14],Crohn's disease,GCST003044
1223,18,77220616,dannemann_only,rs7236492,Chronic inflammatory diseases (ankylosing spon...,NFATC1,"AC018445.5, NFATC1",rs7236492-?,NR,1e-07,,,"ankylosing spondylitis, psoriasis, ulcerative ...",GCST005537


In [20]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('bowel')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
301,3,2076967,simonti_only,rs4437130,Thiopurine-induced pancreatitis in inflammator...,CNTN4,AC018814.1 - RN7SKP144,rs4437130-?,NR,2e-07,6.8,[3.3-14.1],"pancreatitis, response to thiopurine, inflamma...",GCST006208
1129,13,40833012,dannemann_only,rs17061048,Inflammatory bowel disease,NR,LINC00598,rs17061048-A,0.95,5e-09,1.145133,,inflammatory bowel disease,GCST003043
1161,14,75741751,dannemann_only,rs1569328,Inflammatory bowel disease,NR,AF111167.1 - FOS,rs1569328-G,0.83,3e-09,1.084341,,inflammatory bowel disease,GCST003043
1222,18,77220616,dannemann_only,rs7236492,Inflammatory bowel disease,"NFATC1, TST","AC018445.5, NFATC1",rs7236492-G,0.85,1e-08,1.083568,,inflammatory bowel disease,GCST003043


In [21]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('psoriasis')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
148,12,56750204,both,rs2066819,Psoriasis vulgaris,"STAT2, IL23A",STAT2,rs2066819-C,NR,4e-15,1.419,NR,psoriasis vulgaris,GCST003268
149,12,56750204,both,rs2066819,Psoriasis,"STAT2, IL23A",STAT2,rs2066819-C,0.934,5e-17,1.39,,psoriasis,GCST005527
398,5,168386089,both,rs12188351,Inflammatory skin disease,SLIT3,SLIT3,rs12188351-?,NR,1e-08,,,psoriasis,GCST002740
979,10,64538279,dannemann_only,rs7922314,Cutaneous psoriasis,ADO,AC067751.1,rs7922314-C,0.9206,2e-06,1.33,NR,"cutaneous psoriasis measurement, psoriasis",GCST003269
1104,12,56740682,dannemann_only,rs2066807,Psoriasis,"IL23A, STAT2",STAT2,rs2066807-G,0.932351,1e-10,1.55,[1.35-1.77],psoriasis,GCST002874
1105,12,56740682,dannemann_only,rs2066807,Psoriasis,"IL23A, STAT2",STAT2,rs2066807-G,0.932351,5e-12,1.4,[1.27-1.54],psoriasis,GCST002874
1162,14,75741751,dannemann_only,rs1569328,Chronic inflammatory diseases (ankylosing spon...,FOS,AF111167.1 - FOS,rs1569328-?,NR,2e-09,,,"ankylosing spondylitis, psoriasis, ulcerative ...",GCST005537
1223,18,77220616,dannemann_only,rs7236492,Chronic inflammatory diseases (ankylosing spon...,NFATC1,"AC018445.5, NFATC1",rs7236492-?,NR,1e-07,,,"ankylosing spondylitis, psoriasis, ulcerative ...",GCST005537


In [22]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('celiac')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
318,3,46235201,both,rs13098911,Celiac disease,"CCRL2, CCR5, CCR9, CCR1, CCR2, CCR3",CCR3,rs13098911-A,0.10,3e-17,1.3,[1.23-1.39],celiac disease,GCST000612
735,4,123261530,dannemann_only,rs62321692,Celiac disease,KIAA1109,KIAA1109,rs62321692-?,NR,1e-08,0.1924,unit decrease,celiac disease,GCST008489


In [23]:
nean_catalog.loc[nean_catalog['MAPPED_TRAIT'].str.contains('multiple sclerosis')]

Unnamed: 0,Chromosome,Position,Source,ID,DISEASE/TRAIT,REPORTED GENE(S),MAPPED_GENE,STRONGEST SNP-RISK ALLELE,RISK ALLELE FREQUENCY,P-VALUE,OR or BETA,95% CI (TEXT),MAPPED_TRAIT,STUDY ACCESSION
829,6,138179146,dannemann_only,rs17780048,Multiple sclerosis,LOC100130476,WAKMAR2,rs17780048-C,NR,5e-12,1.1008,NR,multiple sclerosis,GCST009597


## Do immune disease-associated Neanderthal SNPs show eQTL?

In [24]:
# Load eQTL data
fairfax_ori = pd.read_csv("../fairfax/tab2_a_cis_eSNPs.txt", sep="\t", usecols=["SNP", "Gene", "Min.dataset", "LPS2.FDR", "LPS24.FDR", "IFN.FDR", "Naive.FDR"])

fairfax_re = pd.read_csv('overlap_filtered_fairfax.csv', usecols=['rsid', 'pvalue', 'gene_id', 'Condition', 'beta'])
fairfax_re.sort_values('pvalue', inplace=True)
fairfax_re.drop_duplicates(subset=['rsid', 'gene_id', 'Condition'], keep='first', inplace=True)

nedelec_re = pd.read_csv('overlap_filtered_nedelec.csv', usecols=['rsid', 'pvalue', 'gene_id', 'Condition', 'beta'])
nedelec_re.sort_values('pvalue', inplace=True)
nedelec_re.drop_duplicates(subset=['rsid', 'gene_id', 'Condition'], keep='first', inplace=True)

quach = pd.read_csv('overlap_filtered_quach.csv', usecols=['rsid', 'pvalue', 'gene_id', 'Condition', 'beta'])
quach.sort_values('pvalue', inplace=True)
quach.drop_duplicates(subset=['rsid', 'gene_id', 'Condition'], keep='first', inplace=True)

alasoo = pd.read_csv('overlap_filtered_alasoo.csv', usecols=['rsid', 'pvalue', 'gene_id', 'Condition', 'beta'])
alasoo.sort_values('pvalue', inplace=True)
alasoo.drop_duplicates(subset=['rsid', 'gene_id', 'Condition'], keep='first', inplace=True)

In [25]:
# Selected Neanderthal SNPs with immune disease associations
gwas = open('overlapped_SNPs.txt', 'r').read().splitlines()
gwas

['rs55726902',
 'rs11727978',
 'rs66819621',
 'rs4916533',
 'rs12152276',
 'rs113048054',
 'rs45613035',
 'rs73203093',
 'rs13239597',
 'rs35000415',
 'rs10488631',
 'rs12706861',
 'rs34889541',
 'rs11059927',
 'rs1385374',
 'rs4252665',
 'rs2066819',
 'rs2066807',
 'rs12188351',
 'rs7922314',
 'rs17061048',
 'rs7236492',
 'rs1569328',
 'rs12422544',
 'rs7076156',
 'rs11175593',
 'rs62478615',
 'rs13239597',
 'rs17340351',
 'rs10488631',
 'rs12534421',
 'rs36073657',
 'rs2306848',
 'rs10488631',
 'rs3783782',
 'rs17830558',
 'rs17831251',
 'rs4664308',
 'rs13098911',
 'rs62321692',
 'rs17780048',
 'rs17339836',
 'rs1265883',
 'rs4261353',
 'rs17291045',
 'rs66659415',
 'rs75482080',
 'rs150469170',
 'rs12788102',
 'rs2660',
 'rs2384071',
 'rs2384072',
 'rs2057778']

In [26]:
# Overlap with original Fairfax eQTLs
ls = set(list(fairfax_ori.SNP)).intersection(gwas)
fairfax_ori.loc[fairfax_ori.SNP.isin(ls)]

Unnamed: 0,SNP,Gene,LPS2.FDR,LPS24.FDR,IFN.FDR,Naive.FDR,Min.dataset
5823,rs7236492,NFATC1,0.254156,1.2e-05,0.000213,0.002774,LPS24
6359,rs2066807,CNPY2,0.074123,0.00078,0.000656,0.034881,IFN
9990,rs7236492,NFATC1,,0.00046,0.028019,0.164114,LPS24
12213,rs7236492,NFATC1,,0.032637,0.183918,0.022086,Naive
12676,rs2066807,TMEM4,0.295496,0.000184,0.286362,0.046866,LPS24


In [27]:
# Overlap with recomputed Fairfax eQTLs
ls = set(list(fairfax_re.rsid)).intersection(gwas)
fairfax_re.loc[fairfax_re.rsid.isin(ls)]

Unnamed: 0,rsid,pvalue,gene_id,beta,Condition
30222,rs2660,1.97779e-137,ENSG00000089127,1.17916,Naive
30214,rs2057778,9.35443e-134,ENSG00000089127,1.20246,Naive
7083,rs2660,7.975459999999999e-130,ENSG00000089127,1.50287,IFN
7069,rs2057778,2.28955e-129,ENSG00000089127,1.53806,IFN
7496,rs2384072,2.50292e-111,ENSG00000089127,1.43131,IFN
7489,rs2384071,2.56783e-111,ENSG00000089127,1.43174,IFN
30454,rs2384071,4.28883e-110,ENSG00000089127,1.10385,Naive
30458,rs2384072,4.375e-110,ENSG00000089127,1.1035,Naive
13481,rs2660,1.64521e-64,ENSG00000089127,1.25204,LPS2
13471,rs2057778,3.8173299999999996e-63,ENSG00000089127,1.27185,LPS2


In [28]:
# Overlap with recomputed Nedelec eQTLs
ls = set(list(nedelec_re.rsid)).intersection(gwas)
nedelec_re.loc[nedelec_re.rsid.isin(ls)]

Unnamed: 0,rsid,pvalue,gene_id,beta,Condition
1674,rs2384071,1.76555e-13,ENSG00000111331,0.162116,Salmonella
1675,rs2384072,1.76762e-13,ENSG00000111331,0.162108,Salmonella
1614,rs2057778,1.0761e-11,ENSG00000111331,0.157447,Salmonella
1616,rs2660,1.07788e-11,ENSG00000111331,0.157389,Salmonella
1083,rs55726902,1.08307e-11,ENSG00000061273,0.296714,Salmonella
121,rs55726902,1.9997e-09,ENSG00000061273,0.253493,Listeria


In [29]:
# Overlap with recomputed Quach eQTLs
ls = set(list(quach.rsid)).intersection(gwas)
quach.loc[quach.rsid.isin(ls)]

Unnamed: 0,rsid,pvalue,gene_id,beta,Condition
916,rs2660,3.8841e-15,ENSG00000111331,0.197481,IAV
914,rs2057778,5.00535e-15,ENSG00000111331,0.197127,IAV
975,rs2384072,6.71257e-15,ENSG00000111331,0.194627,IAV
974,rs2384071,6.71733e-15,ENSG00000111331,0.194641,IAV
1017,rs11059927,4.87631e-12,ENSG00000139370,-0.26489,IAV
1024,rs1385374,4.8801e-12,ENSG00000139370,-0.264857,IAV
7240,rs2384071,4.87105e-10,ENSG00000111331,0.175183,R848
7241,rs2384072,4.87502e-10,ENSG00000111331,0.175165,R848
7182,rs2660,1.27944e-09,ENSG00000111331,0.172391,R848
7180,rs2057778,1.51736e-09,ENSG00000111331,0.171991,R848


In [30]:
# Overlap with recomputed Alasoo eQTLs
ls = set(list(alasoo.rsid)).intersection(gwas)
alasoo.loc[alasoo.rsid.isin(ls)]

Unnamed: 0,rsid,pvalue,gene_id,beta,Condition
1124,rs13098911,5.54313e-11,ENSG00000223552,-0.816275,IFNg
2543,rs66819621,5.6025e-09,ENSG00000174125,-0.463299,Salmonella
636,rs2660,7.55666e-09,ENSG00000089127,-0.128276,IFNg+Salmonella
634,rs2057778,8.34005e-09,ENSG00000089127,-0.128607,IFNg+Salmonella
694,rs2384071,3.36478e-08,ENSG00000089127,-0.121321,IFNg+Salmonella
695,rs2384072,3.37173e-08,ENSG00000089127,-0.121283,IFNg+Salmonella
