Retrieve sequence data from __UCSC__. This method fetches a sequence from the UCSC genome browser using their API.

In [1]:
# Equivalent to the following browser request:
# http://genome.ucsc.edu/cgi-bin/das/hg19/dna?segment=chr1:100000,100010

import urllib.request

def _seq_from_xml(xml):
    start = xml.find(">", xml.find("<DNA")) + 1
    end = xml.rfind("</DNA>")
    return xml[start:end].replace(' ', '').replace('\n', '').strip()

def sequence(db, chrom, start, end):
    """
    Call the UCSC DAS server and return the sequence for a region.
    >>> sequence('hg19', 'chr1', 100000, 100010)
    'cactaagcaca'
    """
    url = "http://genome.ucsc.edu/cgi-bin/das/%s" % db
    url += "/dna?segment=%s:%i,%i"
    xml = urllib.request.urlopen(url % (chrom, start, end)).read().decode("utf-8")
    return _seq_from_xml(xml)

In [2]:
sequence('hg19', 'chr1', 100000, 100010)

'cactaagcaca'