In [9]:
import json
import lyricsgenius as lg
import re
import configparser

In [10]:
# Create a ConfigParser object
config = configparser.ConfigParser()

# Load the config file
config.read('./config.ini')  # Update with the correct path to your config file

# Get the API key
api_key = config['API']['api_key']

# Now you can use the api_key variable in your code
print(api_key)

'8YJsBNbm6a9MqlszdNubBh9ORu240TJ_4NNS9VeFQ7nF0N2zXY9Wx3vwe4wdQtrS'


In [4]:
def save_json(save_name, dictionary):
    with open(f'{save_name}.json', 'w') as json_file:
        json.dump(dictionary, json_file, ensure_ascii=False, indent=4)
    print(f'Data has been saved to "{save_name}.json"')
    
def load_json(file_name):
    with open(f'{file_name}.json', 'r') as json_file:
        loaded_data = json.load(json_file)

    return loaded_data

In [5]:
songArtistPattern = r'"title":"(.*?)"'
songTitlePattern = r'"additionalDescription":"(.*?)"'
songPositionPattern=r'"positionDisplay":(\d+)'
songYoutubeLinkPattern=r'https:\/\/youtube\.com\/embed\/(\w+)'

def build_dict(text1):
    """
    Input: String
    Function to build up the dictionary by extracting the defined patterns from the text.
    Returns a dictionary.
    """
    _dict = {}
    songTitleMatches = re.findall(songTitlePattern, text1)
    songArtistMatches = re.findall(songArtistPattern, text1)
    songPositionMatches = re.findall(songPositionPattern, text1)
    youtubeLinkMatches = re.findall(songYoutubeLinkPattern, text1)
    
    for title, artist, position, youtube_url in zip(songTitleMatches, songArtistMatches, songPositionMatches, youtubeLinkMatches):
        entry = {"title": title, "artist": artist, "youtube_url": youtube_url}
        _dict[int(position)] = entry
 
    return _dict

def cleanup(text):
    """
    Input: string
    Function to clean up the text.
    Returns a string.
    """
    _text = text.lower()
    _text = _text.replace('.', '')
    _text = _text.replace('.', '')
    _text = _text.replace('-', ' ')
    _text = _text.replace("’", '')
    _text = _text.replace("?", '')
    _text = _text.replace("!", '')
    _text = _text.replace("*", 'i')
    _text = _text.replace('&#8217;', '')
    _text = _text.replace(',', '')
    _text = _text.replace('&amp;', '&')
    return _text

In [None]:
def lyrics_download(dict, genius_object, save_dict_name):
    for position, data in dict.items():
        artist = data['artist']

        if 'feat.' in artist:
            artist = artist.split('feat.')[0]
    
        artist = cleanup(artist)
        title = cleanup(data['title'])
    
        print(f'-- AT POSITION:{position} --')
    
        if data['lyrics'] != '' or data['lyrics'] == 'skip':
            if data['lyrics'] != '' and data['lyrics'] != 'skip':
                print("-- DUE TO LYRICS ALREADY FOUND -- \n")
            elif data['lyrics'] == 'skip':
                print("-- DUE TO LYRICS SET TO 'SKIP' -- \n")
            continue
    
        found_lyrics = False
        songs_by_artist = genius_object.search_artist(artist, max_songs=10, sort='popularity')
    
        try:
            for song_object in songs_by_artist.songs:
                song_title = cleanup(song_object.title)
                if(title in song_title):
                    print(f'--- FOUND the lyrics url for: {artist} - {title} ---\n')
                    dict[position]['lyrics'] = song_object.lyrics
                    save_json(save_dict_name, dict)
                    found_lyrics = True
                    continue
    
            if found_lyrics is False:
                print(f'--- Lyrics NOT found for: {artist} - {title} --- \n')
                dict[position]['lyrics'] = 'skip'
                save_json(save_dict_name, dict)
    
        except AttributeError:
            print(f'--- EXCEPTION AT: {artist} - {title} --- \n')
            dict[position]['lyrics'] = 'skip'
            save_json(save_dict_name, dict)

In [6]:
# Specify the path to your text file
east_coast_file_path = './rolling_stones_east_coast.txt'
west_coast_file_path = './rolling_stones_west_coast.txt'

with open(east_coast_file_path, 'r') as file:
    east_coast_text = file.read()
    
with open(west_coast_file_path, 'r') as file:
    west_coast_text = file.read()

east_coast_dict = build_dict(east_coast_text)
save_json('east_coast', east_coast_dict)

west_coast_dict = build_dict(west_coast_text)
save_json('west_coast', west_coast_dict)

Data has been saved to "east_coast.json"
Data has been saved to "west_coast.json"


In [7]:
genius = lg.Genius(api_key, skip_non_songs=True, excluded_terms=["(Live)"], remove_section_headers=True)

In [8]:
# This part is needed because if for some reason we couldn't get the lyrics of a song we just set it to skip so we won't deal with it during the automated process.
# This part will set the lyrics back to an empty string if previously it was set to skip so we can try again fetching them.

for position, data in east_coast_dict.items():
    if east_coast_dict[position]['lyrics'] == 'skip':
        east_coast_dict[position]['lyrics'] = ''
        
for position, data in west_coast_dict.items():
    if west_coast_dict[position]['lyrics'] == 'skip':
        west_coast_dict[position]['lyrics'] = ''

In [None]:
lyrics_download(east_coast_dict, genius, 'east_coast')
lyrics_download(west_coast_dict, genius, 'west_coast')