/
parse_builds_3_sites.py
46 lines (38 loc) · 1.24 KB
/
parse_builds_3_sites.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
"""
Connects to sites and determines which builds are available at each.
"""
from __future__ import print_function
import xml.etree.ElementTree as ElementTree
import requests
sites = ['http://genome.ucsc.edu/cgi-bin/',
'http://archaea.ucsc.edu/cgi-bin/',
'http://genome-test.cse.ucsc.edu/cgi-bin/']
names = ['main', 'archaea', 'test']
def main():
for i in range(len(sites)):
site = sites[i] + "das/dsn"
trackurl = sites[i] + "hgTracks?"
builds = []
try:
text = requests.get(site).text
except:
print("#Unable to connect to " + site)
continue
try:
tree = ElementTree.fromstring(text)
except:
print("#Invalid xml passed back from " + site)
continue
print("#Harvested from", site)
for dsn in tree:
build = dsn.find("SOURCE").attrib['id']
builds.append(build)
build_dict = {}
for build in builds:
build_dict[build] = 0
builds = list(build_dict.keys())
yield [names[i], trackurl, builds]
if __name__ == "__main__":
for site in main():
print(site[0] + "\t" + site[1] + "\t" + ",".join(site[2]))