In [9]:
##Imports##
__author__ = 'bdyetton'
import billboard
import datetime
import pandas as pd
import numpy as np
import swagger_client
import warnings
from swagger_client.rest import ApiException

In [10]:
## a "pure function", a function that does not rely on anything internal in the download lyrics class, so written seperatly
def get_every_week_since(year, month=1, day=1):
    time_start = datetime.datetime(year=year, month=month, day=day)
    time_inc = datetime.timedelta(days=7)
    time_now = datetime.datetime.now()
    current_time = time_start
    times = []
    while current_time < time_now:
        times.append(current_time)
        current_time += time_inc
    return len(times), times

In [11]:
class DownloadSongsAndLyrics(object):
    """A quick and dirty program to download the top billboard songs from a range of genre's and then grab their lyrics from musicmix"""

    def __init__(self):
        swagger_client.configuration.api_key['apikey'] = '82414131af109a5fbece76fd5bff52bb'
        self.musicmix_lyric_instance = swagger_client.LyricsApi()
        self.musicmix_track_instance = swagger_client.TrackApi()


    def get_all_songs_from_all_charts(self,songs_per_week=1,get_all_years_since=2017,charts=['hot-holiday-songs','christian-songs','country-songs','rock-songs','pop-songs','r-b-hip-hop-songs','dance-electronic-songs']):
        """Downloads all the song indormation (w.o. lyrics) from all billboard charts."""
        self.charts = charts
        self.songs_per_week = songs_per_week
        chart_songs = []
        for chart in self.charts:
            chart_songs.append(self.get_all_songs_from_chart(chart,get_all_years_since))
        all_charts_df = pd.concat(chart_songs) #concatinate the list of pandas df to a single df
        return all_charts_df
 
    def get_all_songs_from_chart(self,chart_name,query_year=2017,max_weeks_missing=5):
        print('Beginning download for chart',chart_name)
        num_weeks_to_look_back, weeks_to_look_back = get_every_week_since(year=query_year)
        num_songs_to_download = num_weeks_to_look_back*self.songs_per_week
        #This is where we will save our downloaded song data too
        song_df = pd.DataFrame(columns=('chart','title', 'artist', 'weeks', 'rank', 'query_week', 'lyrics'))
        miss_counter = 0
        #Now step back for num_weeks
        for week_idx in range(0, num_weeks_to_look_back): #Better would be to interate over weeks
            try:
                if week_idx == 0:
                    chart_data = billboard.ChartData(chart_name)
                    next_date_to_get = chart_data.previousDate
                else:             
                    chart_data = billboard.ChartData(chart_name, next_date_to_get)
                    if not chart_data:
                        raise AttributeError("Missing data")
                    else:
                        next_date_to_get = chart_data.previousDate
            except AttributeError as e:
                miss_counter += 1
                if miss_counter > max_weeks_missing:
                    warnings.warn("Too many weeks of this chart missing, moving on to next chart")
                    break
                print("\nMissing song data for %s, %s\n" % (chart_name, next_date_to_get))
                current_missing_date = datetime.datetime.strptime(next_date_to_get, '%Y-%m-%d')
                previous_date = current_missing_date - datetime.timedelta(days=7)
                next_date_to_get = previous_date.strftime('%Y-%m-%d')
                print('trying new date of:',next_date_to_get)
                continue
            songs_from_current_week = 0;
            for song in chart_data:
                if song.title in song_df["title"].values and song.artist in song_df["artist"].values:
                    continue               
                lyrics = self.get_lyric_data_for_song(song.title,song.artist)
                if lyrics is not None:
                    print("\r",'Downloaded song',song_df.shape[0]+1,'of',num_songs_to_download, end="")
                    song_df.loc[song_df.shape[0]+1] = [chart_name,song.title, song.artist, song.weeks, song.rank, chart_data.previousDate,lyrics]
                    songs_from_current_week += 1;
                    if songs_from_current_week >= self.songs_per_week:
                        break
        print(' --- Chart Download Complete')
        return song_df
    
    def get_lyric_data_for_song(self,song_title, song_artist):
        try:
            lyric_response_data = self.musicmix_lyric_instance.matcher_lyrics_get_get(q_track=song_title, q_artist=song_artist)
            lyric_data = lyric_response_data.message.body.lyrics
            if lyric_data is None:
                return None
            return lyric_data.lyrics_body.replace('******* This Lyrics is NOT for Commercial use *******','')
        except ApiException as e:
            print("Warning: Exception when getting lyrics for %s, %s: %s\n" % (e, song.title, song.artist))
            return None
    
        

In [12]:
if __name__ == "__main__":
    print("Downloading song and lyric data")
    downloader = DownloadSongsAndLyrics()
    all_charts = downloader.get_all_songs_from_all_charts(songs_per_week=5,get_all_years_since=2016)
    all_charts.to_csv('BillboardLyricData.txt', sep='\t', encoding='utf-8') #write that bad boy out to file
    print("Finished downloading, saving to BillboardLyricData.txt as tab seperated file")

Downloading song and lyric data
Beginning download for chart hot-holiday-songs
 Downloaded song 50 of 350
Missing song data for hot-holiday-songs, 2015-12-05

trying new date of: 2015-11-28

Missing song data for hot-holiday-songs, 2015-11-28

trying new date of: 2015-11-21

Missing song data for hot-holiday-songs, 2015-11-21

trying new date of: 2015-11-14

Missing song data for hot-holiday-songs, 2015-11-14

trying new date of: 2015-11-07

Missing song data for hot-holiday-songs, 2015-11-07

trying new date of: 2015-10-31




 --- Chart Download Complete
Beginning download for chart christian-songs
 Downloaded song 271 of 350 --- Chart Download Complete350 of 350 218 of 350
Beginning download for chart country-songs
 Downloaded song 247 of 350 --- Chart Download Completeloaded song 130 of 350Downloaded song 132 of 350Downloaded song 190 of 350229 of 350 233 of 350
Beginning download for chart rock-songs
 Downloaded song 348 of 350 --- Chart Download Complete
Beginning download for chart pop-songs
 Downloaded song 225 of 350of 350 of 350 --- Chart Download Complete
Beginning download for chart r-b-hip-hop-songs
 Downloaded song 327 of 350 --- Chart Download Completeloaded song 74 of 350
Beginning download for chart dance-electronic-songs
 Downloaded song 296 of 350of 350 --- Chart Download Complete
Finished downloading, saving to BillboardLyricData.txt as tab seperated file
