Permalink
Browse files

consolidated fetching aoty data and deduping ; made utilities to work…

… like filters instead of hardcoded filenames
  • Loading branch information...
edsu committed Apr 3, 2013
1 parent 6bbe07f commit c2388f7ab55cb5c74c8532022440984d7a530ab7
Showing with 43 additions and 21 deletions.
  1. +30 −2 aoty.py
  2. +13 −19 aoty_cmp.py → compare.py
  3. 0 aoty_dedupe.py → dedupe.py
View
32 aoty.py
@@ -1,5 +1,10 @@
#!/usr/bin/env python
+"""
+Reads Alf's AOTY website and writes out line-oriented json, one line for
+each unique album in the list of lists.
+"""
+
import json
from lxml import html
@@ -9,14 +14,15 @@ def main():
aoty = {}
for y in [2007, 2008, 2009, 2010, 2011, 2012]:
aoty[y] = list(year(y))
- open("aoty.json", "w").write(json.dumps(aoty, indent=2))
+ aoty = dedupe(aoty)
+ for album in aoty:
+ print json.dumps(album)
def year(y):
start = 0
while start != None:
url = 'http://apps.hubmed.org/aoty/%i?_start=%i' % (y, start)
doc = html.parse(url)
- print url
for a in album_lists(doc):
yield a
if doc.xpath(".//a[@rel='next']"):
@@ -39,5 +45,27 @@ def albums(doc):
album_title = album.xpath("string(a[@class='title album'])")
yield {'artist': artist, 'album': album_title}
+def dedupe(aoty):
+ albums = {}
+ album_counts = {}
+
+ for year in aoty.keys():
+ for album_list in aoty[year]:
+ for album in album_list['albums']:
+ k = "%(artist)s :~: %(album)s" % album
+ albums[k] = album
+ album_counts[k] = album_counts.get(k, 0) + 1
+
+ album_keys = albums.keys()
+ album_keys.sort(lambda a, b: cmp(album_counts[b], album_counts[a]))
+
+ sorted_albums = []
+ for k in album_keys:
+ album = albums[k]
+ album['listed'] = album_counts[k]
+ sorted_albums.append(album)
+
+ return sorted_albums
+
if __name__ == "__main__":
main()
View
@@ -1,34 +1,40 @@
#!/usr/bin/env python
+"""
+Reads album information as line-oriented JSON from stdin or a supplied filename
+and looks up the album on spotify and rdio and writes out line-oriented JSON
+with streaming information.
+"""
+
import re
import sys
import json
import time
import logging
+import fileinput
from urllib import quote, urlopen, urlencode
import oauth2 as oauth
import config
-def main(console=False):
- logging.basicConfig(filename="aoty_cmp.log", level=logging.INFO)
+def main():
+ logging.basicConfig(filename="compare.log", level=logging.INFO)
aoty = json.loads(open("aoty_dedupe.json").read())
- for a in aoty:
+ for line in fileinput.input():
+ a = json.loads(line)
try:
artist = a['artist']
album = a['album']
a['spotify'] = spotify(artist, album)
a['rdio'] = rdio(artist, album)
- if console:
- progress(a)
logging.info(a)
+ print json.dumps(a)
except Exception, e:
logging.exception("error while comparing")
sys.exit(1)
time.sleep(1)
- open("aoty.json", "w").write(json.dumps(aoty, indent=2))
def spotify(artist, album):
q = '%s AND "%s"' % (artist, album)
@@ -126,18 +132,6 @@ def rdio(artist, album):
return {'can_stream': can_stream, 'url': url}
-def progress(a):
- r = a['rdio']['can_stream']
- s = a['spotify']['can_stream']
- if r and s:
- sys.stderr.write(".")
- elif r:
- sys.stderr.write("r")
- elif s:
- sys.stderr.write("s")
- else:
- sys.stderr.write("x")
-
def clean(a):
a = a.lower()
a = re.sub(' and ', '', a)
@@ -147,4 +141,4 @@ def clean(a):
return a
if __name__ == "__main__":
- main(console=True)
+ main()
File renamed without changes.

0 comments on commit c2388f7

Please sign in to comment.