In [42]:
# necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def get_billboard_top_100():
    url = "https://www.billboard.com/charts/hot-100/"
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Error fetching Billboard Top 100: {response.status_code}")
        return [], []
    
    soup = BeautifulSoup(response.text, 'html.parser')

    songs = []
    artists = []

    # Find all song titles and artists
    for item in soup.select('.o-chart-results-list-row-container'):
        title_element = item.select_one('.o-chart-results-list-row-container h3')
        artist_element = item.select_one('.o-chart-results-list-row-container h3 + span')

        # Check if elements exist before accessing
        if title_element and artist_element:
            title = title_element.get_text(strip=True)
            artist = artist_element.get_text(strip=True)
            songs.append(title)
            artists.append(artist)
        else:
            print("Song or artist element not found in:", item)

    return songs, artists


def scrape_lyrics(song_title, artist_name):
    artist = artist_name.replace(" ", "-").replace("&","and").split("-Featuring")[0]
    title = song_title.replace(" ", "-").replace("(", "").replace(")", "")
    
    search_url = f"https://genius.com/{artist}-{title}-lyrics"
    print(f"Fetching lyrics from: {search_url}")

    response = requests.get(search_url)
    if response.status_code != 200:
        print(f"Error fetching data for {song_title}: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    lyrics_div = soup.find('div', class_='Lyrics__Container-sc-1ynbvzw-6')
    
    if not lyrics_div:
        lyrics_div = soup.find('div', class_='Lyrics__Root-sc-1ynbvzw-0')

    if not lyrics_div:
        lyrics_div = soup.find('div', class_='Lyrics__Container-sc-1ynbvzw-6 Lyrics--withRating')

    lyrics = []
    if lyrics_div:
        lyrics_text = lyrics_div.get_text("\n", strip=True)
        lyrics.append(lyrics_text)
    else:
        lyrics.append("Lyrics not found.")

    time.sleep(1) 

    return lyrics

def extract_lyrics(lyrics_list):
    formatted_lyrics = []
    for song in lyrics_list:
        for lyrics in song:
            # Split the lyrics by lines
            lines = lyrics.split('\n')
            # Join the lines into a single string
            full_lyrics = "\n".join(lines)
            # Find the index of the first '['
            first_bracket_index = full_lyrics.find('[')
            # If '[' is found, slice the lyrics from that index to the end
            if first_bracket_index != -1:
                cleaned_lyrics = full_lyrics[first_bracket_index:]
            else:
                cleaned_lyrics = full_lyrics  # If no '[', return full lyrics
            # Filter out unwanted lines
            cleaned_lyrics = "\n".join([line for line in cleaned_lyrics.split('\n') if line.strip() and not any(prefix in line for prefix in ["Contributors", "Translations", "Lyrics"])])
            formatted_lyrics.append(cleaned_lyrics)
    return formatted_lyrics


def main():
    songs, artists = get_billboard_top_100()

    song_paths = []
    
    for i in range(0,6):
        song = songs[i]
        artist = artists[i]
        song_paths.append(scrape_lyrics(song,artist))
        
    cleaned_lyrics = extract_lyrics(song_paths)

    #print(cleaned_lyrics[0])

    first_6_s = songs[:6]
    first_6_a = songs[:6]
    
    print(len(first_6_s))
    print(len(first_6_a))
    print(len(cleaned_lyrics))
    

    df = pd.DataFrame({
        'Song Title': first_6_s,
        'Artist': first_6_a,
        'Lyrics': cleaned_lyrics
    })

    df.to_csv('billboard_top_100_lyrics.csv', index=False)

    print(df)

if __name__ == "__main__":
    main()


Fetching lyrics from: https://genius.com/Shaboozey-A-Bar-Song-Tipsy-lyrics
Fetching lyrics from: https://genius.com/Billie-Eilish-Birds-Of-A-Feather-lyrics
Fetching lyrics from: https://genius.com/Sabrina-Carpenter-Espresso-lyrics
Fetching lyrics from: https://genius.com/Lady-Gaga-and-Bruno-Mars-Die-With-A-Smile-lyrics
Fetching lyrics from: https://genius.com/Post-Malone-I-Had-Some-Help-lyrics
Fetching lyrics from: https://genius.com/Teddy-Swims-Lose-Control-lyrics
6
6
6
           Song Title              Artist  \
0  A Bar Song (Tipsy)  A Bar Song (Tipsy)   
1  Birds Of A Feather  Birds Of A Feather   
2            Espresso            Espresso   
3    Die With A Smile    Die With A Smile   
4     I Had Some Help     I Had Some Help   
5        Lose Control        Lose Control   

                                              Lyrics  
0  [Verse 1]\nMy baby want a Birkin, she's been t...  
1  [Intro]\n[Verse 1]\nI want you to stay\n'Til I...  
2  [Chorus]\nNow he's thinkin' 'bout me eve

In [6]:
# necessary libraries
#get spotify dev api

