In [34]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_billboard_year_end():
    
    # Lists to store data
    all_data = []
    
    for year in range(2012, 2023):
        # Wikipedia URL for Billboard's Year-End Hot 100 chart
        url = f'https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_{year}'

        # Send a GET request to the URL
        response = requests.get(url)

        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the table containing the list of songs
        table = soup.find('table', {'class': 'wikitable'})

        # Find all rows in the table (skipping the header row)
        rows = table.find_all('tr')[1:]

        # Lists to store data
        billboard_list = []
        song_list = []
        artist_list = []

        # Iterate through rows and extract song names, artists, and billboard numbers
        for row in rows:
            columns = row.find_all('td')

            # Check if there are enough columns
            if len(columns) >= 3:
                billboard_num = columns[0].text.strip() 
                song_name = columns[1].text.strip() 
                artist_name = columns[2].text.strip() 

                billboard_list.append(billboard_num)
                song_list.append(song_name)
                artist_list.append(artist_name)

        # Create a DataFrame
        df = pd.DataFrame({
            'Year': [year] * len(billboard_list),
            'Billboard Number': billboard_list,
            'Song Name': song_list,
            'Artist Name': artist_list
        })

        # Append the DataFrame to the list
        all_data.append(df)

    # Concatenate all DataFrames into a single DataFrame
    result_df = pd.concat(all_data, ignore_index=True)

    # Print the DataFrame without the index
    #print(result_df.to_string(index=False))
    return result_df


top_100_df = scrape_billboard_year_end()
print(top_100_df)

# Export the filtered DataFrame to a CSV file
top_100_df.to_csv('top_100_df.csv', index=False)



      Year Billboard Number                       Song Name  \
0     2012                1  "Somebody That I Used to Know"   
1     2012                2                 "Call Me Maybe"   
2     2012                3                  "We Are Young"   
3     2012                4                      "Payphone"   
4     2012                5                        "Lights"   
...    ...              ...                             ...   
1090  2022               96                  "Flower Shops"   
1091  2022               97                   "To the Moon"   
1092  2022               98                        "Unholy"   
1093  2022               99               "One Mississippi"   
1094  2022              100      "Circles Around This Town"   

                         Artist Name  
0             Gotye featuring Kimbra  
1                   Carly Rae Jepsen  
2        Fun featuring Janelle Monáe  
3     Maroon 5 featuring Wiz Khalifa  
4                     Ellie Goulding  
...      

In [33]:
# Example to filter the DataFrame for the year 2022
filtered_df = top_100_df[top_100_df['Year'] == 2022]

# Print only the rows for the year 2022
print("\nRows for Year 2022:")
print(filtered_df)


Rows for Year 2022:
      Year Billboard Number                   Song Name  \
995   2022                1                "Heat Waves"   
996   2022                2                 "As It Was"   
997   2022                3                      "Stay"   
998   2022                4                "Easy on Me"   
999   2022                5                   "Shivers"   
...    ...              ...                         ...   
1090  2022               96              "Flower Shops"   
1091  2022               97               "To the Moon"   
1092  2022               98                    "Unholy"   
1093  2022               99           "One Mississippi"   
1094  2022              100  "Circles Around This Town"   

                          Artist Name  
995                     Glass Animals  
996                      Harry Styles  
997   The Kid Laroi and Justin Bieber  
998                             Adele  
999                        Ed Sheeran  
...                           