Skip to content
Browse files

initial commit

  • Loading branch information...
0 parents commit c306127e9281f110752487c4ade4a068a3eabb08 @edsu committed Aug 6, 2011
Showing with 82,541 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +15 −0 README.rst
  3. +37,072 −0 aoty-dedupe.json
  4. +31 −0 aoty-dedupe.py
  5. +45,300 −0 aoty.json
  6. +42 −0 aoty.py
  7. +73 −0 aotycmp.py
  8. +4 −0 config.py.orig
  9. +2 −0 requirements.pip
2 .gitignore
@@ -0,0 +1,2 @@
+config.py
+*.log
15 README.rst
@@ -0,0 +1,15 @@
+aotycmp is a hack to see what listed on Alf Eaton's [Albums of the
+Year](http://aoty.hubmed.org) list of lists (AOTY) can be found and
+streamable in Spotify and Rdio. The results are found in aoty.json.
+
+The steps for reproducing the aoty.json are to:
+
+0. pip install -r requirements.pip
+1. cp config.py.orig config.py
+2. get a Rdio API Key and put credentials in config.py
+3. ./aoty.py # crawls aoty.hubmed.org and stores data in aoty.json
+4. ./aoty-dedupe.py # dedupes albums across lists and stores in aoty-dedupe.json
+5. ./aotycmp.py # reds aoty-dedupe.json and stores results of rdio/spotify lookups in aotycmp.json
+
+Maybe I should've dumped the crawled data into CouchDB instead of chaining
+JSON dumps together like this. Could be more fun right?
37,072 aoty-dedupe.json
37,072 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
31 aoty-dedupe.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+import json
+
+aoty = json.loads(open('aoty.json').read())
+
+albums = {}
+album_counts = {}
+
+for year in aoty.keys():
+ for album_list in aoty[year]:
+ for album in album_list['albums']:
+ k = "%(artist)s :~: %(album)s" % album
+ albums[k] = album
+ album_counts[k] = album_counts.get(k, 0) + 1
+
+album_keys = albums.keys()
+album_keys.sort(lambda a, b: cmp(album_counts[b], album_counts[a]))
+
+sorted_albums = []
+for k in album_keys:
+ album = albums[k]
+ album['listed'] = album_counts[k]
+ sorted_albums.append(album)
+
+open('aoty-dedupe.json', 'w').write(json.dumps(sorted_albums, indent=2))
+
+
+
+
+
45,300 aoty.json
45,300 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
42 aoty.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+import json
+
+from lxml import html
+
+
+def main():
+ aoty = {}
+ for y in [2007, 2008, 2009, 2010]:
+ aoty[y] = list(year(y))
+ open("aoty.json", "w").write(json.dumps(aoty, indent=2))
+
+def year(y):
+ start = 0
+ while start != None:
+ url = 'http://apps.hubmed.org/aoty/%i?_start=%i' % (y, start)
+ doc = html.parse(url)
+ for a in album_lists(doc):
+ yield a
+ if doc.xpath(".//a[@rel='next']"):
+ start += 20
+ else:
+ start = None
+
+def album_lists(doc):
+ for album_list in doc.xpath(".//div[@class='item']"):
+ a = album_list.xpath(".//a[@class='title']")[0]
+ list_name = a.text
+ list_url = a.attrib['href']
+ yield {'name': list_name,
+ 'url': list_url,
+ 'albums': list(albums(album_list))}
+
+def albums(doc):
+ for album in doc.xpath(".//li[@class='album']"):
+ artist = album.xpath("string(a[@class='artist'])")
+ album_title = album.xpath("string(a[@class='title'])")
+ yield {'artist': artist, 'album': album_title}
+
+if __name__ == "__main__":
+ main()
73 aotycmp.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+import json
+import time
+import logging
+from urllib import quote, urlopen, urlencode
+
+import oauth2 as oauth
+
+import config
+
+
+def main():
+ logging.basicConfig(filename="aotycmp.log")
+ aoty = json.loads(open("aoty-dedupe.json").read())
+ for a in aoty:
+ try:
+ artist = a['artist']
+ album = a['album']
+ a['spotify'] = spotify(artist, album)
+ a['rdio'] = rdio(artist, album)
+ logging.info(a)
+ except Excpetion, e:
+ logging.exception(e)
+ time.sleep(1)
+ open("aotycmp.json", "w").write(json.dumps(aoty))
+
+def spotify(artist, album):
+ q = '%s AND "%s"' % (artist, album)
+ q = quote(q.encode('utf-8'))
+ url = 'http://ws.spotify.com/search/1/album.json?q=' + q
+ response = json.loads(urlopen(url).read())
+
+ can_stream = False
+ url = None
+
+ for a in response['albums']:
+ if a['name'] == album and spotify_artist(a, artist):
+ url = a['href']
+ if config.COUNTRY in a['availability']['territories'].split(' '):
+ can_stream = True
+
+ return {'can_stream': can_stream, 'url': url}
+
+def spotify_artist(a, artist_name):
+ for artist in a['artists']:
+ if artist['name'] == artist_name:
+ return True
+ return False
+
+def rdio(artist, album):
+ consumer = oauth.Consumer(config.RDIO_CONSUMER_KEY,
+ config.RDIO_CONSUMER_SECRET)
+ client = oauth.Client(consumer)
+ q = {'method': 'search',
+ 'query': ('%s %s' % (artist, album)).encode('utf-8'),
+ 'types': 'Album'}
+ j = client.request('http://api.rdio.com/1/', 'POST', urlencode(q))[1]
+ response = json.loads(j)
+
+ can_stream = False
+ url = None
+ for r in response['result']['results']:
+ if r['name'] == album and r['artist']:
+ url = "http://rdio.com" + r['url']
+ if r['canStream'] == True:
+ can_stream = True
+
+ return {'can_stream': can_stream, 'url': url}
+
+
+if __name__ == "__main__":
+ main()
4 config.py.orig
@@ -0,0 +1,4 @@
+COUNTRY = 'US'
+RDIO_CONSUMER_KEY = ''
+RDIO_CONSUMER_SECRET = ''
+
2 requirements.pip
@@ -0,0 +1,2 @@
+lxml
+oauth2

0 comments on commit c306127

Please sign in to comment.
Something went wrong with that request. Please try again.