In [1]:
# Import packages
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import datetime as date

In [2]:
# Get html code from Billboard Hot 100 page
link = requests.get('https://www.billboard.com/charts/hot-100/')
soup = BeautifulSoup(link.content, 'html.parser')
song_containers = soup.find_all('li', class_='o-chart-results-list__item')

In [3]:
# Extract song titles from containters
titles = []
for container in song_containers:
    first_title_tag = container.find('h3', class_='c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet" id="title-of-a-story')

    if first_title_tag:
        first_title = first_title_tag.get_text(strip=True)
        titles.append(first_title)
                             
    title_tag = container.find('h3', class_='c-title')
    
    if title_tag: 
        title = title_tag.get_text(strip=True)
        titles.append(title)

In [4]:
# Extract artist names from containters
artists = []
for container in song_containers: 
    first_artist_tag = container.find('span', class_='c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only u-font-size-20@tablet')

    if first_artist_tag:
        first_artist = first_artist_tag.get_text(strip=True)
        artists.append(first_artist)
    artist_tag = container.find('span', class_='c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only')
    
    if artist_tag:  # Only proceed if the title tag is found
        artist = artist_tag.get_text(strip=True)
        artists.append(artist)

In [5]:
# Extract all metrics from containters
metrics = []
for container in song_containers:
    first_metric_tag = container.find('span', class_='c-label a-font-primary-bold-l a-font-primary-m@mobile-max u-font-weight-normal@mobile-max lrv-u-padding-tb-050@mobile-max u-font-size-32@tablet')

    if first_metric_tag:
        first_metric = first_metric_tag.get_text(strip=True)
        metrics.append(first_metric)
        
    metric_tag = container.find('span', class_='c-label a-font-primary-m lrv-u-padding-tb-050@mobile-max')
    
    if metric_tag: 
        metric = metric_tag.get_text(strip=True)
        metrics.append(metric)

In [6]:
# Organize metrics into groups of 6
metrics = [metrics[i:i + 6] for i in range(0, len(metrics), 6)]

In [7]:
# Zip first two lists and create a dataframe
zipped_lists = zip(titles, artists)
top_100 = pd.DataFrame(zipped_lists, columns=['titles', 'artists']).reset_index()

In [8]:
# Prepare metrics for merge
metric_df = pd.DataFrame(metrics, columns=['last_week', 'top_position', 'weeks_on_chart', 'last_week_2', 'top_position_2', 'weeks_on_chart_2']).reset_index()
metric_df = metric_df.drop(['last_week_2', 'top_position_2', 'weeks_on_chart_2'], axis=1)

In [10]:
# Merge metrics onto data frame
top_100 = pd.merge(left=top_100, right=metric_df, how='outer', on='index')
top_100['rank'] = top_100['index'] + 1
top_100 = top_100.drop('index', axis=1)
top_100['last_week'] = top_100['last_week'].replace('-', 'none')

In [11]:
# Check data for accuracy
top_100

Unnamed: 0,titles,artists,last_week,top_position,weeks_on_chart,rank
0,A Bar Song (Tipsy),Shaboozey,1,1,17,1
1,I Had Some Help,Post Malone Featuring Morgan Wallen,2,1,13,2
2,Not Like Us,Kendrick Lamar,3,1,14,3
3,Espresso,Sabrina Carpenter,4,3,17,4
4,Million Dollar Baby,Tommy Richman,5,2,15,5
...,...,...,...,...,...,...
95,Alibi,"Sevdaliza, Pabllo Vittar & Yseult",98,95,3,96
96,Parking Lot,Mustard & Travis Scott,81,57,4,97
97,Wine Into Whiskey,Tucker Wetmore,95,68,19,98
98,"Love You, Miss You, Mean It",Luke Bryan,none,99,1,99


In [12]:
# Prepare file name to use today's date
file_name = 'billboard_' + str(date.date.today()) + '_' + str(date.date.today()) + '.csv'

In [13]:
# Export file
top_100.to_csv('billboard_2024-08-13_2024-08-13.csv')