Skip to content
Newer
Older
100644 264 lines (233 sloc) 9.21 KB
7a591c8 @alexkay daemon: Refactor
authored Oct 16, 2011
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright © 2009-2011 Alexander Kojevnikov <alexander@kojevnikov.com>
4 #
5 # muspy is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU Affero General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # muspy is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU Affero General Public License for more details.
14 #
15 # You should have received a copy of the GNU Affero General Public License
16 # along with muspy. If not, see <http://www.gnu.org/licenses/>.
17
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
18 import logging
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
19 import re
20 import StringIO
21 from urllib2 import Request, urlopen
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
22
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
23 from PIL import Image
24
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
25 from app import lastfm
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
26 from app.cover import Cover
7a591c8 @alexkay daemon: Refactor
authored Oct 16, 2011
27 from app.models import *
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
28 import app.musicbrainz as mb
29 from app.tools import str_to_date
30 from daemon import tools
7a591c8 @alexkay daemon: Refactor
authored Oct 16, 2011
31
32
33 def process():
34 """Work on pending jobs."""
35 while True:
36 try:
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
37 job = Job.objects.select_related('user').order_by('id')[0]
7a591c8 @alexkay daemon: Refactor
authored Oct 16, 2011
38 except IndexError:
39 break
40
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
41 if job.type == Job.ADD_ARTIST:
42 if not add_artist(job.user, job.data):
43 tools.sleep()
44 continue
45 elif job.type == Job.ADD_RELEASE_GROUPS:
46 if not add_release_groups(job.data):
47 tools.sleep()
48 continue
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
49 elif job.type == Job.GET_COVER:
50 get_cover(job.data)
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
51 elif job.type == Job.IMPORT_LASTFM:
6b5a789 @alexkay Allow to specify the period when importing from Last.fm
authored Nov 3, 2011
52 count, period, username = job.data.split(',', 2)
53 import_lastfm(job.user, username, int(count), period)
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
54
dc77829 @alexkay daemon: Don't sleep in a transaction
authored Oct 30, 2011
55 job.delete()
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
56
57
58 def add_artist(user, search):
59 tools.sleep()
60 logging.info('[JOB] Searching for artist [%s] for user %d' % (search, user.id))
61 found_artists, count = mb.search_artists(search, limit=2, offset=0)
62 if found_artists is None:
c3f7c86 @alexkay daemon: Skip artists if MB doesn't respond
authored Feb 9, 2012
63 logging.warning('[ERR] MusicBrainz error while searching, skipping')
64 return True
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
65
66 only_one = len(found_artists) == 1
67 first_is_exact = (len(found_artists) > 1 and
68 found_artists[0]['name'].lower() == search.lower() and
69 found_artists[1]['name'].lower() != search.lower())
70 if only_one or first_is_exact:
71 artist_data = found_artists[0]
72 mbid = artist_data['id']
73
74 # get_by_mbid() queries MB, must sleep.
75 tools.sleep()
76 logging.info('[JOB] Adding artist %s' % mbid)
449d6cc @alexkay Blacklist special-purpose artists
authored Oct 19, 2011
77 try:
78 artist = Artist.get_by_mbid(mbid)
79 except Artist.Blacklisted:
87c4ecb @alexkay Handle unknown artists
authored Oct 24, 2011
80 logging.warning('[ERR] Artist %s is blacklisted, skipping' % mbid)
81 return True
82 except Artist.Unknown:
83 logging.warning('[ERR] Artist %s is unknown, skipping' % mbid)
449d6cc @alexkay Blacklist special-purpose artists
authored Oct 19, 2011
84 return True
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
85 if not artist:
86 logging.warning('[ERR] Could not fetch artist %s, retrying' % mbid)
87 return False
88 UserArtist.add(user, artist)
89 else:
90 logging.info('[JOB] Could not identify artist by name, saving for later')
91 UserSearch(user=user, search=search).save()
92
93 return True
94
95 def add_release_groups(mbid):
96 logging.info('[JOB] Fetching release groups for artist %s' % mbid)
97 try:
98 artist = Artist.objects.get(mbid=mbid)
99 except Artist.DoesNotExist:
100 logging.warning('[ERR] Cannot find by mbid, skipping' % mbid)
101 return True
102
103 LIMIT = 100
104 offset = 0
105 while True:
106 tools.sleep()
107 logging.info('[JOB] Fetching release groups at offset %d' % offset)
108 release_groups = mb.get_release_groups(mbid, limit=LIMIT, offset=offset)
109 if release_groups:
110 with transaction.commit_on_success():
111 for rg_data in release_groups:
112 # Ignoring releases without a release date or a type.
113 if rg_data.get('first-release-date') and rg_data.get('type'):
9af209c @alexkay Multiple artists per release group
authored Oct 25, 2011
114 q = ReleaseGroup.objects.filter(
f026ba0 @alexkay Fixes
authored Oct 26, 2011
115 artist=artist, mbid=rg_data['id'])
9af209c @alexkay Multiple artists per release group
authored Oct 25, 2011
116 if q.exists():
e14b44e @alexkay daemon: Implement ADD_ARTIST and ADD_RELEASE_GROUP jobs
authored Oct 17, 2011
117 continue
118 release_group = ReleaseGroup(
119 artist=artist,
120 mbid=rg_data['id'],
121 name=rg_data['title'],
122 type=rg_data['type'],
123 date=str_to_date(rg_data['first-release-date']),
124 is_deleted=False)
125 release_group.save()
126 if release_groups is None:
127 logging.warning('[ERR] MusicBrainz error, retrying')
128 continue
129 if len(release_groups) < LIMIT:
130 break
131 offset += LIMIT
132
133 return True
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
134
135 def get_cover(mbid):
136 logging.info('[JOB] Trying to find a cover for %s' % mbid)
137 tools.sleep()
138 logging.info('[JOB] Get releases')
139 releases = mb.get_releases(mbid, limit=100, offset=0)
140 if releases is None:
141 logging.warning('[ERR] Could not get releases, skipping')
142 return
143 releases = [r for r in releases if r.get('date')]
144
145 # Order releases by date.
146 def by_date(a, b):
147 # Convert 2011 to 2011-99-99 and 2011-01 to 2011-01-99.
148 d1, d2 = a['date'], b['date']
149 while len(d1) < 10: d1 += '-99'
150 while len(d2) < 10: d2 += '-99'
151 return cmp(d1, d2)
152 releases = sorted(releases, cmp=by_date)
153
154 # We don't want to check all 100 releases.
155 releases = [r['id'] for r in releases][:10]
156
157 url = None
158 for release in releases:
159 tools.sleep()
160 logging.info('[JOB] Checking release %s' % release)
161 try:
162 request = Request(
163 'http://musicbrainz.org/release/' + release,
164 headers = {'User-Agent': 'muspy/2.0'})
165 response = urlopen(request)
166 html = response.read()
167 except:
168 logging.warning('[ERR] Could not fetch the release page, skipping')
169 continue
170
171 # Parsing the release page
172 pattern = r'<div class="cover-art">\s*<img src="(?P<url>[^"]+)"'
173 match = re.search(pattern, html)
174 if not match:
175 logging.info('[JOB] No cover art, skipping')
176 continue
177 url = match.group('url')
8ee8588 @alexkay daemon: Get covers from Last.fm
authored Oct 23, 2011
178 if _fetch_cover(mbid, url):
179 return
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
180
8ee8588 @alexkay daemon: Get covers from Last.fm
authored Oct 23, 2011
181 logging.info('[JOB] Try to get cover from Last.fm')
9af209c @alexkay Multiple artists per release group
authored Oct 25, 2011
182 for rg in ReleaseGroup.objects.filter(mbid=mbid).select_related('artist'):
183 urls = lastfm.get_cover_urls(rg.artist.name, rg.name) or []
184 for url in urls:
185 if _fetch_cover(mbid, url):
186 return
b333d5c @alexkay daemon: Get album covers from MB
authored Oct 22, 2011
187
188 logging.warning('[ERR] Could not find a cover')
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
189
8ee8588 @alexkay daemon: Get covers from Last.fm
authored Oct 23, 2011
190 def _fetch_cover(mbid, url):
191 logging.info('[JOB] Downloading the cover')
192 image = None
193 try:
194 request = Request(url, headers = {'User-Agent': 'muspy/2.0'})
195 response = urlopen(request)
196 image = response.read()
197 except:
198 logging.warning('[ERR] Could not download, skipping')
199 return False
200
201 # Sometimes we get just a one-pixel image, avoid resizing it.
202 if len(image) < 4096:
203 logging.warning('[ERR] Bad image, skipping')
204 return False
205
206 logging.info('[JOB] Saving the cover')
207 try:
208 im = Image.open(StringIO.StringIO(image))
209 im = im.resize((120, 120), Image.ANTIALIAS)
210 f = StringIO.StringIO()
211 im.save(f, 'JPEG', quality=95)
212 image = f.getvalue()
213 Cover(mbid, image)
214 return True
215 except:
216 logging.warning('[ERR] Could not save the cover, skipping')
217 return False
218
6b5a789 @alexkay Allow to specify the period when importing from Last.fm
authored Nov 3, 2011
219 def import_lastfm(user, username, count, period):
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
220 logging.info('[JOB] Importing %d artists from Last.fm for user %s' % (count, username))
221 LIMIT = 50
222 page, added = 0, 0
223 while True:
224 page += 1
225 tools.sleep()
226 logging.info('[JOB] Getting page %d' % page)
6b5a789 @alexkay Allow to specify the period when importing from Last.fm
authored Nov 3, 2011
227 artists = lastfm.get_artists(username, period, LIMIT, page)
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
228
229 if artists is None:
230 logging.warning('[ERR] Last.fm error, retrying')
3abe5eb @alexkay daemon: Fix page increment when importing
authored Oct 24, 2011
231 page -= 1
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
232 continue
233
6b5a789 @alexkay Allow to specify the period when importing from Last.fm
authored Nov 3, 2011
234 if not artists: break
235
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
236 for artist_data in artists:
237 mbid = artist_data.get('mbid', '')
238 if mbid:
239 while True:
240 # Artist.get_by_mbid will query MB if the artist is not yet
241 # in the database. Query first to avoid unnecessary sleep.
242 if not Artist.objects.filter(mbid=mbid).exists():
243 tools.sleep()
244 logging.info('[JOB] Getting artist %s' % mbid)
245 try:
246 artist = Artist.get_by_mbid(mbid)
247 except Artist.Blacklisted:
248 logging.info('[JOB] Blacklisted artist, skipping')
249 break
87c4ecb @alexkay Handle unknown artists
authored Oct 24, 2011
250 except Artist.Unknown:
251 logging.info('[JOB] Unknown artist, skipping')
252 break
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
253 if not artist:
254 logging.warning('[ERR] Cannot get the artist data, retrying')
255 continue
256 UserArtist.add(user, artist)
257 break
258 else:
7b2fec0 @alexkay daemon: Typo
authored Oct 24, 2011
259 add_artist(user, artist_data['name'])
f599c7a @alexkay daemon: Import from Last.fm
authored Oct 23, 2011
260 added += 1
261 if added == count: break
262
263 if added == count: break
Something went wrong with that request. Please try again.