In [88]:
# necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def get_billboard_top_100():
    url = "https://www.billboard.com/charts/hot-100/"
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Error fetching Billboard Top 100: {response.status_code}")
        return [], []
    
    soup = BeautifulSoup(response.text, 'html.parser')

    songs = []
    artists = []

    # Find all song titles and artists
    for item in soup.select('.o-chart-results-list-row-container'):
        title_element = item.select_one('.o-chart-results-list-row-container h3')
        artist_element = item.select_one('.o-chart-results-list-row-container h3 + span')

        # Check if elements exist before accessing
        if title_element and artist_element:
            title = title_element.get_text(strip=True)
            artist = artist_element.get_text(strip=True)
            songs.append(title)
            artists.append(artist)
        else:
            print("Song or artist element not found in:", item)

    return songs, artists


def scrape_lyrics(song_title, artist_name):
    artist = artist_name.replace(" ", "-").replace("&","and").split("-Featuring")[0]
    title = song_title.replace(" ", "-").replace("(", "").replace(")", "")
    
    search_url = f"https://genius.com/{artist}-{title}-lyrics"
    print(f"Fetching lyrics from: {search_url}")

    response = requests.get(search_url)
    if response.status_code != 200:
        print(f"Error fetching data for {song_title}: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    lyrics_div = soup.find('div', class_='Lyrics__Container-sc-1ynbvzw-6')
    
    if not lyrics_div:
        lyrics_div = soup.find('div', class_='Lyrics__Root-sc-1ynbvzw-0')

    if not lyrics_div:
        lyrics_div = soup.find('div', class_='Lyrics__Container-sc-1ynbvzw-6 Lyrics--withRating')

    lyrics = []
    if lyrics_div:
        lyrics_text = lyrics_div.get_text("\n", strip=True)
        lyrics.append(lyrics_text)
    else:
        lyrics.append("Lyrics not found.")

    time.sleep(1) 

    return lyrics


def main():
    songs, artists = get_billboard_top_100()

    song_paths = []
    
    for i in range(0,5):
        song = songs[i]
        artist = artists[i]
        song_paths.append(scrape_lyrics(song,artist))
        
    print(song_paths)
    #song_paths = [get_lyrics(song) for song in songs]
    #lyrics = scrape_lyrics(song_paths)

    # Create a DataFrame and save to CSV
    #df = pd.DataFrame({
    #    'Song Title': songs,
    #    'Artist': artists,
    #    'Lyrics': lyrics
    #})


    #df.to_csv('billboard_top_100_lyrics.csv', index=False)

if __name__ == "__main__":
    main()


Fetching lyrics from: https://genius.com/Shaboozey-A-Bar-Song-Tipsy-lyrics
Fetching lyrics from: https://genius.com/Billie-Eilish-Birds-Of-A-Feather-lyrics
Fetching lyrics from: https://genius.com/Sabrina-Carpenter-Espresso-lyrics
Fetching lyrics from: https://genius.com/Lady-Gaga-and-Bruno-Mars-Die-With-A-Smile-lyrics
Fetching lyrics from: https://genius.com/Post-Malone-I-Had-Some-Help-lyrics
[['48 Contributors\nTranslations\nEspañol\nDeutsch\nFrançais\nУкраїнська\nNorsk (bokmål / riksmål)\nA Bar Song (Tipsy) Lyrics\n[Verse 1]\nMy baby want a Birkin, she\'s been tellin\' me all night long\nGasoline and groceries, the list goes on and on\nThis\nnine-to-five ain\'t workin\n\', why the hell do I work so hard?\nI can\'t worry \'bout my problems, I can\'t take \'em when I\'m gone, uh\n[Pre-Chorus]\nOne, here comes the two to the three to the four\nTell \'em "Bring another round," we need plenty more\nTwo-steppin\' on the table, she don\'t need a dance floor\nOh my, good Lord\n[Chorus]\nSom