/
functions.py
110 lines (98 loc) · 3.16 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import requests
from bs4 import BeautifulSoup, Comment, NavigableString
import sys, codecs, json
class Track(object):
def __init__(self,trackName,album,artist):
self.name = trackName
self.album = album
self.artist = artist
def __repr__(self):
return self.name
def link(self):
return 'http://lyrics.wikia.com/{0}:{1}'.format(self.artist.replace(' ', '-'),self.name.replace(' ','-'))
def getLyrics(self):
return PyLyrics.getLyrics(self.artist,self.name)
class Artist(object):
def __init__(self, name):
self.name = name
def getAlbums(self):
return PyLyrics.getAlbums(self.name)
def __repr__(self):
return self.name.encode('utf-8')
class Album(object):
def __init__(self, name, link,singer):
self.year = name.split(' ')[-1]
self.name = name.replace(self.year,' ').rstrip()
self.url = link
self.singer = singer
def link(self):
return self.url
def __repr__(self):
if sys.version_info[0] == 2:
return self.name.encode('utf-8','replace')
return self.name
def artist(self):
return self.singer
def tracks(self):
return PyLyrics.getTracks(self)
class PyLyrics:
@staticmethod
def getAlbums(singer):
singer = singer.replace(' ', '_')
s = BeautifulSoup(requests.get('http://lyrics.wikia.com/{0}'.format(singer)).text)
spans = s.findAll('span',{'class':'mw-headline'})
als = []
for tag in spans:
try:
a = tag.findAll('a')[0]
als.append(Album(a.text,'http://lyrics.wikia.com' + a['href'],singer))
except:
pass
if als == []:
raise ValueError("Unknown Artist Name given")
return None
return als
@staticmethod
def getTracks(album):
url = "http://lyrics.wikia.com/api.php?action=lyrics&artist={0}&fmt=xml".format(album.artist())
soup = BeautifulSoup(requests.get(url).text)
for al in soup.find_all('album'):
if al.text.lower().strip() == album.name.strip().lower():
currentAlbum = al
break
songs =[Track(song.text,album,album.artist()) for song in currentAlbum.findNext('songs').findAll('item')]
return songs
@staticmethod
def getLyrics(singer, song):
#Replace spaces with _
singer = singer.replace(' ', '_')
song = song.replace(' ', '_')
r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer,song))
s = BeautifulSoup(r.text)
#Get main lyrics holder
lyrics = s.find("div",{'class':'lyricbox'})
if lyrics is None:
raise ValueError("Song or Singer does not exist or the API does not have Lyrics")
return None
#Remove Scripts
[s.extract() for s in lyrics('script')]
#Remove Comments
comments = lyrics.findAll(text=lambda text:isinstance(text, Comment))
[comment.extract() for comment in comments]
#Remove unecessary tags
for tag in ['div','i','b','a']:
for match in lyrics.findAll(tag):
match.replaceWithChildren()
#Get output as a string and remove non unicode characters and replace <br> with newlines
output = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode("utf-8").replace('\n','').replace('<br/>','\n')
try:
return output
except:
return output.encode('utf-8')
def main():
albums = PyLyrics.getAlbums('OneRepublic')
print (albums)
tracks = PyLyrics.getTracks(albums[-1])
print (tracks[7].getLyrics())
if __name__=='__main__':
main()