## Identify genes in  2q11.1 sub regions

Identify genes in 20-kb sub-regions as in manuscript Results.
Regions are identified using GRCh37 (1KG) coordinates

In [1]:
import sys, os
from pyliftover import LiftOver
from ensembldb3 import HostAccount, Genome


path = '/home/helmut/helmutsimonpython/helmutsimonpython/Neighbourhood_Effects'
if not os.getcwd() == path:
    os.chdir(path)

account = HostAccount(*os.environ['ENSEMBL_ACCOUNT'].split())
release = 89
genome = Genome(species='human', release=release, account=account)

In [2]:
def locate_genes(start, end):
    genes = genome.get_features(coord_name=coord_name, start=start, end=end, feature_types="gene")
    for gene in genes:
        gstart = gene.location.start
        gend = gene.location.end
        print('\n', gene.symbol, gstart, gend)
        for exon in gene.canonical_transcript.exons:
            print(exon, exon.location)
        if gene.canonical_transcript.introns:
            for intron in gene.canonical_transcript.introns:
                print(intron, intron.location)

chr2:96845244-96865244 (A)

In [3]:
chrom = 'chr2'
coord_name = '2'
lo = LiftOver('hg19', 'hg38')

start = lo.convert_coordinate(chrom, 96845244)[0][1]
end   = lo.convert_coordinate(chrom, 96865244)[0][1]

print(start, end)
locate_genes(start, end)

96179506 96199506

 STARD7 96184858 96208825
Exon(stableid=ENSE00001853520, rank=1) Homo sapiens:chromosome:2:96208144-96208818:-1
Exon(stableid=ENSE00003614687, rank=2) Homo sapiens:chromosome:2:96195340-96195549:-1
Exon(stableid=ENSE00003643171, rank=3) Homo sapiens:chromosome:2:96194957-96195007:-1
Exon(stableid=ENSE00000921755, rank=4) Homo sapiens:chromosome:2:96193241-96193352:-1
Exon(stableid=ENSE00000540421, rank=5) Homo sapiens:chromosome:2:96193077-96193160:-1
Exon(stableid=ENSE00003663624, rank=6) Homo sapiens:chromosome:2:96192368-96192468:-1
Exon(stableid=ENSE00003622057, rank=7) Homo sapiens:chromosome:2:96187216-96187301:-1
Exon(stableid=ENSE00001817479, rank=8) Homo sapiens:chromosome:2:96184858-96186914:-1
Intron(TranscriptId=ENST00000337288, rank=1) Homo sapiens:chromosome:2:96195549-96208144:-1
Intron(TranscriptId=ENST00000337288, rank=2) Homo sapiens:chromosome:2:96195007-96195340:-1
Intron(TranscriptId=ENST00000337288, rank=3) Homo sapiens:chromosome:2:96193352-961

chr2:96905244-96925244 (B)

In [4]:
start = lo.convert_coordinate(chrom, 96905244)[0][1]
end   = lo.convert_coordinate(chrom, 96925244)[0][1]

print(start, end)
locate_genes(start, end)

96239506 96259506

 STARD7-AS1 96208415 96242621
Exon(stableid=ENSE00001472846, rank=1) Homo sapiens:chromosome:2:96239712-96240688:1
Exon(stableid=ENSE00001531202, rank=2) Homo sapiens:chromosome:2:96241876-96242621:1
Intron(TranscriptId=ENST00000432267, rank=1) Homo sapiens:chromosome:2:96240688-96241876:1

 TMEM127 96248515 96265994
Exon(stableid=ENSE00001071712, rank=1) Homo sapiens:chromosome:2:96265868-96265994:-1
Exon(stableid=ENSE00000921763, rank=2) Homo sapiens:chromosome:2:96265137-96265512:-1
Exon(stableid=ENSE00003610373, rank=3) Homo sapiens:chromosome:2:96254832-96254997:-1
Exon(stableid=ENSE00000921761, rank=4) Homo sapiens:chromosome:2:96248515-96254115:-1
Intron(TranscriptId=ENST00000258439, rank=1) Homo sapiens:chromosome:2:96265512-96265868:-1
Intron(TranscriptId=ENST00000258439, rank=2) Homo sapiens:chromosome:2:96254997-96265137:-1
Intron(TranscriptId=ENST00000258439, rank=3) Homo sapiens:chromosome:2:96254115-96254832:-1

 TMEM127 5000 20806
Exon(stableid=LRG_528

chr2:96985244-97005244 (C)

In [5]:
start = lo.convert_coordinate(chrom, 96985244)[0][1]
end   = lo.convert_coordinate(chrom, 97005244)[0][1]

print(start, end)
locate_genes(start, end)

96319506 96339506

 AC021188.1 96307262 96321731
Exon(stableid=ENSE00001666599, rank=1) Homo sapiens:chromosome:2:96321135-96321731:-1
Exon(stableid=ENSE00001760016, rank=2) Homo sapiens:chromosome:2:96307262-96307406:-1
Intron(TranscriptId=ENST00000421534, rank=1) Homo sapiens:chromosome:2:96307406-96321135:-1

 ITPRIPL1 96325330 96330517
Exon(stableid=ENSE00001435085, rank=1) Homo sapiens:chromosome:2:96326222-96330517:1

 NCAPH 96335786 96373845
Exon(stableid=ENSE00001893240, rank=1) Homo sapiens:chromosome:2:96335786-96335848:1
Exon(stableid=ENSE00000770619, rank=2) Homo sapiens:chromosome:2:96341641-96341894:1
Exon(stableid=ENSE00003653556, rank=3) Homo sapiens:chromosome:2:96342049-96342140:1
Exon(stableid=ENSE00003602184, rank=4) Homo sapiens:chromosome:2:96342755-96342848:1
Exon(stableid=ENSE00003650085, rank=5) Homo sapiens:chromosome:2:96343165-96343304:1
Exon(stableid=ENSE00003563081, rank=6) Homo sapiens:chromosome:2:96344104-96344229:1
Exon(stableid=ENSE00003648879, rank=7

chr2:97165244-97185244 (D)

In [6]:
start = lo.convert_coordinate(chrom, 97165244)[0][1]
end   = lo.convert_coordinate(chrom, 97185244)[0][1]

print(start, end)
locate_genes(start, end)

96499507 96519507

 NEURL3 96497642 96508109
Exon(stableid=ENSE00001616021, rank=1) Homo sapiens:chromosome:2:96505258-96505357:-1
Exon(stableid=ENSE00003602863, rank=2) Homo sapiens:chromosome:2:96500438-96500924:-1
Exon(stableid=ENSE00003743593, rank=3) Homo sapiens:chromosome:2:96499377-96499449:-1
Exon(stableid=ENSE00001795020, rank=4) Homo sapiens:chromosome:2:96497642-96498446:-1
Intron(TranscriptId=ENST00000451794, rank=1) Homo sapiens:chromosome:2:96500924-96505258:-1
Intron(TranscriptId=ENST00000451794, rank=2) Homo sapiens:chromosome:2:96499449-96500438:-1
Intron(TranscriptId=ENST00000451794, rank=3) Homo sapiens:chromosome:2:96498446-96499377:-1


In [4]:
from scipy.special import binom
binom(4,5)

0.0