/
ckan.py
executable file
·163 lines (134 loc) · 4.7 KB
/
ckan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python
"""
Generate lod.js Protovis data file.
"""
import os
import sys
import json
import time
import urllib
import logging
import traceback
from datetime import datetime
LOG_FILE = "ckan.log"
LOG_LEVEL = logging.INFO
def main(argv):
"""talk to ckan rest api and generate lod.js
"""
configure_logging()
log = logging.getLogger()
log.info("starting to load data from ckan")
try:
packages = lod_packages()
javascript = protovis_javascript(packages)
write_javascript(javascript)
except BaseException, e:
traceback.print_exc()
log.fatal("exiting after unexpected error: %s" % e)
log.info("finished ckan load")
def lod_packages():
"""returns a list of package metadata from ckan
"""
log = logging.getLogger()
packages = []
count = 0
for package in ckan('group/lodcloud')['packages']:
package_info = ckan('package/%s' % package)
if package_info == None:
log.error("unable to retrieve package info for %s" % package)
continue
package_info['internal_id'] = count
packages.append(package_info)
log.info("got info for %s" % package_info['name'])
count += 1
return packages
def ckan(path):
"""gets a JSON resource via the CKAN API
"""
log = logging.getLogger()
u = 'http://ckan.net/api/rest/' + path
r = urllib.urlopen('http://ckan.net/api/rest/' + path)
if r.getcode() == 200:
return json.loads(r.read())
else:
log.error("%s from ckan, unable to retrieve %s" % (r.getcode(), u))
return None
def protovis_javascript(packages):
"""generates protovis javascript data file
"""
protovis = {'nodes': get_nodes(packages),
'links': get_links(packages)}
javascript = "var lod = " + json.dumps(protovis, indent=2)
return javascript
def write_javascript(javascript):
"""safely writes protovis javascript to lod.js as well as
last_update.html that records the last time lod.js was updated.
"""
now = datetime.now()
tmp_file = "lod.js-%s" % datetime.strftime(now, "%Y%m%dT%H%M%S")
open(tmp_file, "w").write(javascript)
os.rename(tmp_file, "lod.js")
tz = time.tzname[1] if time.daylight else time.tzname[0]
t = datetime.strftime(now, "%Y-%m-%d %H:%M:%S ") + tz
file("last_update.html", "w").write("<span>last update: %s</span>" % t)
def get_nodes(packages):
"""constructs a list of nodes suitable for protovis
"""
nodes = []
for package in packages:
if package['ratings_average'] == None:
rating = None
else:
rating = float(package['ratings_average'])
if 'triples' in package['extras']:
triples = package['extras']['triples']
else:
triples = 1000
if 'shortname' in package['extras']:
short_title = package['extras']['shortname']
else:
short_title = package['title']
nodes.append({
'ratings_average': rating,
'ratings_count': package['ratings_count'],
'nodeName': short_title,
'nodeTitle': package['title'],
'ckanUrl': 'http://ckan.net/package/%s' % package['name'],
'triples': triples})
return nodes
def get_links(packages):
"""returns links between the nodes suitable for protovis
"""
log = logging.getLogger()
# first get a dictionary lookup for all the packages by name
package_map = {}
for package in packages:
package_map[package['name']] = package
# now generate links based on the numeric id of the package
links = []
for from_package in packages:
for key in from_package['extras']:
if key.startswith('links:'):
to_package_name = key.split(':')[1]
if not package_map.has_key(to_package_name):
log.error("%s has link to %s which doesn't exist" % \
(from_package['name'], to_package_name))
continue
try:
count = int(from_package['extras'][key])
except ValueError:
count = 1
links.append({
'source': from_package['internal_id'],
'target': package_map[to_package_name]['internal_id'],
'count': count})
return links
def configure_logging():
logger = logging.getLogger()
logger.setLevel(LOG_LEVEL)
handler = logging.FileHandler(LOG_FILE)
formatter = logging.Formatter("""[%(asctime)s %(levelname)s %(name)s] %(message)s""")
handler.setFormatter(formatter)
logger.addHandler(handler)
if __name__ == "__main__":
main(sys.argv)