Billboard Hot 100 Chart Overview

In [16]:
# Import all libraries
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import time
from datetime import datetime, timedelta
import plotly.express as px

In [17]:
# Generate Saturdays from a specific year until today's date
def generate_saturdays(start_year):
    saturdays = []
    start_date = datetime(start_year, 1, 1)
    end_date = datetime.now()

    # Loop through dates until today
    while start_date <= end_date:
        if start_date.weekday() == 5:
            saturdays.append(start_date.strftime('%Y-%m-%d'))
        start_date += timedelta(days=1)

    return saturdays

# Generate Saturdays starting from 2022 until today
saturdays = generate_saturdays(2022)
print(saturdays)

['2022-01-01', '2022-01-08', '2022-01-15', '2022-01-22', '2022-01-29', '2022-02-05', '2022-02-12', '2022-02-19', '2022-02-26', '2022-03-05', '2022-03-12', '2022-03-19', '2022-03-26', '2022-04-02', '2022-04-09', '2022-04-16', '2022-04-23', '2022-04-30', '2022-05-07', '2022-05-14', '2022-05-21', '2022-05-28', '2022-06-04', '2022-06-11', '2022-06-18', '2022-06-25', '2022-07-02', '2022-07-09', '2022-07-16', '2022-07-23', '2022-07-30', '2022-08-06', '2022-08-13', '2022-08-20', '2022-08-27', '2022-09-03', '2022-09-10', '2022-09-17', '2022-09-24', '2022-10-01', '2022-10-08', '2022-10-15', '2022-10-22', '2022-10-29', '2022-11-05', '2022-11-12', '2022-11-19', '2022-11-26', '2022-12-03', '2022-12-10', '2022-12-17', '2022-12-24', '2022-12-31', '2023-01-07', '2023-01-14', '2023-01-21', '2023-01-28', '2023-02-04', '2023-02-11', '2023-02-18', '2023-02-25', '2023-03-04', '2023-03-11', '2023-03-18', '2023-03-25', '2023-04-01', '2023-04-08', '2023-04-15', '2023-04-22', '2023-04-29', '2023-05-06', '2023

In [18]:
driver = webdriver.Firefox()

year = []
month = []
day = []

title = []
singer = []
last_week_position = []
peak_position = []
weeks_on_chart = []
current_week_position = []


for date_str in saturdays:
    url = f'https://www.billboard.com/charts/hot-100/{date_str}/'
    driver.get(url)
    
    w = WebDriverWait(driver, 15)
    w.until(EC.presence_of_element_located((By.CLASS_NAME,"c-title")))
    
    song_titles = driver.find_elements(By.ID, "title-of-a-story") ## Find the common class across the list of music
    
    i=0
    
    for song_title in song_titles:
        # Dates
        date = datetime.strptime(date_str, '%Y-%m-%d')  # Convert date string back to datetime
        month.append(date.strftime("%m"))  # Append the month
        day.append(date.strftime("%d"))     # Append the day
        year.append(date.strftime("%Y"))    # Append the year
        
        i = i+1

        try:
            title_element = driver.find_element(By.XPATH, f'/html/body/div[4]/main/div[2]/div[3]/div/div/div/div[2]/div[{i}]/ul/li[4]/ul/li[1]/h3').text
            title.append(title_element)
        except:
            title.append("No title available")
            pass
        
        # Extract Singer
        try:
            singer_element = driver.find_element(By.XPATH, f'//*[@id="post-1479786"]/div[3]/div/div/div/div[2]/div[{i}]/ul/li[4]/ul/li[1]/span').text
            singer.append(singer_element)
        except:
            singer.append("No singer available")
            pass
            

        # This weak position
        try:
            current_week_pos = driver.find_element(By.XPATH, f'//*[@id="post-1479786"]/div[3]/div/div/div/div[2]/div[{i}]/ul/li[1]/span').text
            current_week_position.append(current_week_pos)
        except:
            current_week_position.append("No Week Position")
            pass


        # Extract last week
        try:
            last_week_pos = driver.find_element(By.XPATH, f'/html/body/div[4]/main/div[2]/div[3]/div/div/div/div[2]/div[{i}]/ul/li[4]/ul/li[4]/span').text
            last_week_position.append(last_week_pos)
        except:
            last_week_position.append("No Last Week Position")
            pass

        # Peak Position
        try:
            peak_pos = driver.find_element(By.XPATH, f'/html/body/div[4]/main/div[2]/div[3]/div/div/div/div[2]/div[{i}]/ul/li[4]/ul/li[5]/span').text
            peak_position.append(peak_pos)
        except:
            peak_position.append("No Peak Position")
            pass

        # Weeks on Chart
        try:
            wks_chart = driver.find_element(By.XPATH, f'//*[@id="post-1479786"]/div[3]/div/div/div/div[2]/div[{i}]/ul/li[4]/ul/li[6]/span').text
            weeks_on_chart.append(wks_chart)
        except:
            weeks_on_chart.append("No Weeks on Chart")
            pass
            

driver.quit() 

In [19]:
df = pd.DataFrame(year, columns=['Year'])
df['Month'] = month
df['Day'] = day
df['Rank'] = current_week_position
df['Title'] = title
df['Singer(s)'] = singer
df['Last Week Position'] = last_week_position
df['Peak Position'] = peak_position
df['Weeks on Chart'] = weeks_on_chart

df = df.loc[df['Title'] != 'No title available'].reset_index(drop=True)

df


Unnamed: 0,Year,Month,Day,Rank,Title,Singer(s),Last Week Position,Peak Position,Weeks on Chart
0,2022,01,01,1,All I Want For Christmas Is You,Mariah Carey,1,1,50
1,2022,01,01,2,Rockin' Around The Christmas Tree,Brenda Lee,2,2,44
2,2022,01,01,3,Jingle Bell Rock,Bobby Helms,4,3,41
3,2022,01,01,4,A Holly Jolly Christmas,Burl Ives,5,4,25
4,2022,01,01,5,Easy On Me,Adele,3,1,11
...,...,...,...,...,...,...,...,...,...
14595,2024,10,12,96,Keep Up,Odetari,-,96,1
14596,2024,10,12,97,Passport Junkie,Rod Wave,98,61,3
14597,2024,10,12,98,This Is My Dirt,Justin Moore,-,98,1
14598,2024,10,12,99,Close To You,Gracie Abrams,-,49,9


In [20]:
# Convert columns to numeric
df['Rank'] = pd.to_numeric(df['Rank'])
df['Last Week Position'] = pd.to_numeric(df['Last Week Position'], errors='coerce')
df['Peak Position'] = pd.to_numeric(df['Peak Position'], errors='coerce')
df['Weeks on Chart'] = pd.to_numeric(df['Weeks on Chart'], errors='coerce')

# Display the DataFrame
df

Unnamed: 0,Year,Month,Day,Rank,Title,Singer(s),Last Week Position,Peak Position,Weeks on Chart
0,2022,01,01,1,All I Want For Christmas Is You,Mariah Carey,1.0,1,50
1,2022,01,01,2,Rockin' Around The Christmas Tree,Brenda Lee,2.0,2,44
2,2022,01,01,3,Jingle Bell Rock,Bobby Helms,4.0,3,41
3,2022,01,01,4,A Holly Jolly Christmas,Burl Ives,5.0,4,25
4,2022,01,01,5,Easy On Me,Adele,3.0,1,11
...,...,...,...,...,...,...,...,...,...
14595,2024,10,12,96,Keep Up,Odetari,,96,1
14596,2024,10,12,97,Passport Junkie,Rod Wave,98.0,61,3
14597,2024,10,12,98,This Is My Dirt,Justin Moore,,98,1
14598,2024,10,12,99,Close To You,Gracie Abrams,,49,9


Top 10 Songs by Accumulated Weeks at No. 1 (2022-2024)

In [21]:
# Filter for songs that were No. 1
no1_songs = df[df['Rank'] == 1]

# Create a new DataFrame with just the Year, Title, and Singer(s)
# Group by Title and Singer(s) to count the weeks at No. 1
accumulated_weeks_no1 = no1_songs.groupby(['Year', 'Title', 'Singer(s)']).size().reset_index(name='Accumulated Weeks')
# Sort by accumulated weeks at No. 1
accumulated_weeks_no1 = accumulated_weeks_no1.sort_values(by=['Accumulated Weeks'], ascending=False)
# Show the top 10 songs with the most accumulated weeks at No.1
top_10_no1 = accumulated_weeks_no1.head(10)

# Display the final DataFrame
print(top_10_no1)

    Year                            Title  \
23  2023                       Last Night   
3   2022                        As It Was   
33  2024               A Bar Song (Tipsy)   
19  2023                          Flowers   
2   2022                        Anti-Hero   
37  2024                  I Had Some Help   
40  2024                      Lovin On Me   
1   2022  All I Want For Christmas Is You   
13  2022        We Don't Talk About Bruno   
8   2022                       Heat Waves   

                                            Singer(s)  Accumulated Weeks  
23                                      Morgan Wallen                 16  
3                                        Harry Styles                 15  
33                                          Shaboozey                 13  
19                                        Miley Cyrus                  8  
2                                        Taylor Swift                  6  
37                Post Malone Featuring Morgan Wallen 

In [22]:
# Top 10 Songs by Accumulated Weeks at No. 1 (2022-2024)

# Create a bar chart
fig = px.bar(
    top_10_no1,
    x='Title',
    y='Accumulated Weeks',
    hover_data=['Singer(s)'],
    color='Accumulated Weeks',
    title='Top 10 Songs by Most Accumulated Weeks at No. 1 (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    xaxis_title='Songs',
    yaxis_title='Accumulated Weeks at No.1',
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        tickmode='linear'
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()


Monthly New Songs in the Top 10 (2022-2024)

In [23]:
# Filter for new songs that were in Top 10
top10_songs = df[(df['Rank'] <= 10) & (df['Last Week Position'] > 10)]

# Create a new DataFrame with just the Year, Month, Title, and Singer(s)
# Group by Year and Month to count the number of Top 10 songs
top10_songs_by_month = top10_songs.groupby(['Year', 'Month']).size().reset_index(name='Monthly New Songs in the Top 10')
# Combine Year and Month
top10_songs_by_month['Year-Month'] = top10_songs_by_month['Year'] + '-' + top10_songs_by_month['Month']
# Drop the original Year and Month columns
top10_songs_by_month = top10_songs_by_month.drop(columns=['Year', 'Month'])
# Rearrange the columns so that 'Year and Month' comes first
top10_songs_by_month = top10_songs_by_month[['Year-Month', 'Monthly New Songs in the Top 10']]

# Display the final DataFrame
print(top10_songs_by_month)

   Year-Month  Monthly New Songs in the Top 10
0     2022-01                               12
1     2022-02                                4
2     2022-03                                4
3     2022-04                                2
4     2022-05                                2
5     2022-06                                4
6     2022-07                                4
7     2022-08                                5
8     2022-09                                4
9     2022-10                                3
10    2022-11                                9
11    2022-12                                7
12    2023-01                               10
13    2023-02                                5
14    2023-03                                5
15    2023-04                                4
16    2023-05                                4
17    2023-06                                1
18    2023-07                                2
19    2023-08                                4
20    2023-09

In [24]:
# Monthly New Songs in the Top 10 (2022-2024)

# Create a line chart
fig = px.line(
    top10_songs_by_month,
    x='Year-Month',
    y='Monthly New Songs in the Top 10',
    color_discrete_sequence=['red'],
    title='Monthly New Songs in the Top 10 (2022-2024)',
)

# Customize the chart
fig.update_traces(hovertemplate='Year-Month: %{x|%Y-%m}<br>Value: %{y}')
fig.update_layout(
    xaxis_title='Year-Month',
    yaxis_title='Counts',
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ))

# Show the chart
fig.show()


Top 5 Singers with the most numbers of Top 10 Songs (2022-2024)

In [25]:
# Filter for songs that were in the top 10
top10_songs = df[df['Rank'] <= 10]

# Drop duplicates based on 'Title' and 'Singer(s)' to ensure each song is counted once per artist
unique_top10_songs = top10_songs.drop_duplicates(subset=['Title', 'Singer(s)'])

# Group by Singer(s) to count the number of unique top 10 songs
top10_songs_singers = unique_top10_songs.groupby('Singer(s)').size().reset_index(name='No. of Top 10 Songs')

# Sort by the number of top 10 songs
top10_songs_singers = top10_songs_singers.sort_values(by='No. of Top 10 Songs', ascending=False)

# Show the top 5 singers with the most number of unique top 10 songs
top5_singers = top10_songs_singers.head(5)

# Display the final DataFrame
print(top5_singers)

             Singer(s)  No. of Top 10 Songs
109       Taylor Swift                   25
25               Drake                    8
88       Morgan Wallen                    7
26   Drake & 21 Savage                    6
24            Doja Cat                    5


In [26]:
# Top 5 Singers with the most numbers of Top 10 Songs (2022-2024)

# Create a bar chart
fig = px.bar(
    top5_singers,
    x='Singer(s)',
    y='No. of Top 10 Songs',
    color='No. of Top 10 Songs',
    title='Top 5 Singers with The Most Numbers of Top 10 Songs (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    xaxis_title='Singer(s)',
    yaxis_title='No. of Top 10 Songs',
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()


No.1 Songs with Most Weeks on Chart (2022-2024)

In [27]:
# Filter for songs that reached No. 1
no1_songs = df[df['Rank'] == 1]

# Group the songs by Title and Singer(s) and count the maximum weeks
weeks_count = no1_songs.groupby(['Title', 'Singer(s)'])['Weeks on Chart'].max().reset_index()

# Count how many songs spend each number of weeks on chart
weeks_to_no1_count = weeks_count['Weeks on Chart'].value_counts().reset_index()

# Rename the columns for clarity
weeks_to_no1_count.columns = ['Number of Weeks', 'Count of Songs']

# Sort by the number of weeks in ascending order
weeks_to_no1_count_sorted = weeks_to_no1_count.sort_values(by=['Number of Weeks'], ascending=False)

# Show the top 10 results
top10_weeks_to_no1_count_sorted = weeks_to_no1_count_sorted.head(10)

# Display the final DataFrame
print(top10_weeks_to_no1_count_sorted)


    Number of Weeks  Count of Songs
18               64               1
12               63               1
19               58               1
16               32               1
10               31               1
8                28               2
4                25               2
14               19               1
3                15               3
17               14               1


In [28]:
# No.1 Songs with Most Weeks on Chart (2022-2024)

# Create a bar chart
fig = px.bar(
    top10_weeks_to_no1_count_sorted,
    x='Number of Weeks',
    y='Count of Songs',
    color_discrete_sequence=['#E48F72'],
    title='No.1 Songs with Most Weeks on Chart (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    xaxis_title='Number of Weeks',
    yaxis_title='Count of Songs',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
        type='category'
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        tickmode='linear'
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()


Songs with the most weeks on chart (2022-2024)

In [29]:
# Group the songs by Title and Singer(s) and count the maximum weeks
weeks_on_chart = df.groupby(['Title', 'Singer(s)'])['Weeks on Chart'].max().reset_index()

# Sort by the number of weeks in ascending order
weeks_on_chart_sorted = weeks_on_chart.sort_values(by=['Weeks on Chart'], ascending=False)

# Show the top 10 results
top10_weeks_on_chart_sorted = weeks_on_chart_sorted.head(10)

# Display the final DataFrame
print(top10_weeks_on_chart_sorted)


                                Title                      Singer(s)  \
687                        Heat Waves                  Glass Animals   
919                        Levitating                       Dua Lipa   
1464                           Snooze                            SZA   
1383                  Save Your Tears     The Weeknd & Ariana Grande   
1473          Something In The Orange                     Zach Bryan   
62    All I Want For Christmas Is You                   Mariah Carey   
1507                             Stay  The Kid LAROI & Justin Bieber   
1861                        You Proof                  Morgan Wallen   
108                         As It Was                   Harry Styles   
953                      Lose Control                    Teddy Swims   

      Weeks on Chart  
687               91  
919               77  
1464              70  
1383              69  
1473              66  
62                65  
1507              63  
1861              62  


In [30]:
# Songs with the most weeks on chart (2022-2024)
# Create a bar chart
fig = px.bar(
    top10_weeks_on_chart_sorted,
    x='Title',
    y='Weeks on Chart',
    color='Weeks on Chart',
    hover_data=['Singer(s)'],
    title='Songs with The Most Weeks on Chart (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Songs',
    yaxis_title='Weeks on Chart',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Distribution of Last Week Positions among Top 10 Songs (2022-2024)

In [31]:
# Filter for songs that reached top 10 and were outside that the week before
top10_songs = df[(df['Rank'] <= 10) & (df['Last Week Position'] > 10)]

# Count how many songs took each number of weeks to get to No. 1
last_week_position = top10_songs['Last Week Position'].value_counts().reset_index()

# Rename the columns for clarity
last_week_position.columns = ['Last Week Position', 'Count of Songs']

# Sort by the number of weeks in ascending order
last_week_position_sorted = last_week_position.sort_values(by=['Last Week Position'], ascending=True)

# Show the top 10 results
last_week_position_sorted = last_week_position_sorted.head(20)

# Display the final DataFrame
print(last_week_position_sorted)

    Last Week Position  Count of Songs
0                 11.0              43
1                 12.0              25
3                 13.0              11
4                 14.0               9
2                 15.0              12
7                 16.0               5
6                 17.0               6
10                18.0               3
5                 19.0               7
12                20.0               3
18                21.0               2
8                 22.0               4
16                23.0               2
24                24.0               1
13                25.0               2
17                26.0               2
9                 27.0               4
23                28.0               1
25                29.0               1
19                30.0               1


In [32]:
# Distribution of Last Week Positions among Top 10 Songs (2022-2024)
# Create a histogram
fig = px.histogram(
    last_week_position_sorted,
    x='Last Week Position',
    y='Count of Songs',
    color='Last Week Position',
    title='Distribution of Last Week Positions among Top 10 Songs (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Last Week Position',
    yaxis_title='Count of Songs',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Peak Position vs. Total Weeks on Chart (2022-2024)

In [33]:
# Group the songs by Title, Singer(s), and Peak Position, and get the maximum weeks
weeks_on_chart = df.groupby(['Title', 'Singer(s)', 'Peak Position'])['Weeks on Chart'].max().reset_index()

# Sort the DataFrame by 'Weeks on Chart' in descending order
weeks_on_chart_sorted = weeks_on_chart.sort_values(by='Weeks on Chart', ascending=False)

# Display the final DataFrame
print(weeks_on_chart_sorted)

                        Title  \
1404               Heat Waves   
1832               Levitating   
2796                   Snooze   
2655          Save Your Tears   
2826  Something In The Orange   
...                       ...   
2359                Paperwork   
2358        Painting Pictures   
1268                     Gold   
3011             Take My Name   
1701               Keep Going   

                                              Singer(s)  Peak Position  \
1404                                      Glass Animals              1   
1832                                           Dua Lipa              2   
2796                                                SZA              2   
2655                         The Weeknd & Ariana Grande              1   
2826                                         Zach Bryan             10   
...                                                 ...            ...   
2359                             ¥$: Ye & Ty Dolla $ign             64   
2358       

In [34]:
# Peak Position vs. Total Weeks on Chart (2022-2024)
# Create a scatter plot
fig = px.scatter(
    weeks_on_chart_sorted,
    x='Weeks on Chart',
    y='Peak Position',
    color_discrete_sequence=['skyblue'],
    hover_data=['Title', 'Singer(s)'],
    title='Peak Position vs. Total Weeks on Chart (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Weeks on Chart',
    yaxis_title='Peak Position',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Longest Running Songs in Top 10 (2022-2024)

In [35]:
# Filter for songs that reached Top 10
top10_songs = df[df['Rank'] <= 10]

# Count how many weeks each song spent in the Top 10, including the singer
weeks_top10_count = top10_songs.groupby(['Title', 'Singer(s)']).size().reset_index(name='Weeks in Top 10')

# Sort by the number of weeks in descending order
weeks_top10_count_sorted = weeks_top10_count.sort_values(by=['Weeks in Top 10'], ascending=False)

# Show the top 5 results
top_weeks_top10 = weeks_top10_count_sorted.head(5)

# Display the final DataFrame
print(top_weeks_top10)

            Title      Singer(s)  Weeks in Top 10
97     Last Night  Morgan Wallen               41
105  Lose Control    Teddy Swims               38
11      As It Was   Harry Styles               38
35   Cruel Summer   Taylor Swift               34
72     Heat Waves  Glass Animals               30


In [36]:
# Longest Running Songs in Top 10 (2022-2024)
# Create a bar chart
fig = px.bar(
    top_weeks_top10,
    x='Title',
    y='Weeks in Top 10',
    color='Weeks in Top 10',
    hover_data=['Singer(s)'],
    title='Longest Running Songs in Top 10 (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Songs',
    yaxis_title='Weeks in Top 10',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Top 5 No. 1 Songs with the Fewest Weeks to Reach the Top Position (2022-2024)

In [37]:
# Filter for songs that reached No. 1
no1_songs = df[df['Rank'] == 1]

# Find the weeks on chart for No. 1 songs grouped by title and singer
weeks_to_best = no1_songs.groupby(['Title', 'Singer(s)'])['Weeks on Chart'].min()

# Count how many songs took each number of weeks to get to No. 1
weeks_to_no1_count = weeks_to_best.value_counts().reset_index()

# Rename columns for clarity
weeks_to_no1_count.columns = ['Weeks to No. 1', 'Counts']

# Sort by the number of weeks in ascending order
weeks_to_no1_count_sorted = weeks_to_no1_count.sort_values(by='Weeks to No. 1', ascending=True)

# Get the top 5 results
top5_weeks_to_no1_count = weeks_to_no1_count_sorted.head(5)

# Display the final DataFrame
print(top5_weeks_to_no1_count)


    Weeks to No. 1  Counts
0                1      22
1                2       4
2                4       3
4                5       2
13               6       1


In [38]:
# Top 5 No. 1 Songs with the Fewest Weeks to Reach the Top Position (2022-2024)
# Create a bar chart
fig = px.bar(
    top5_weeks_to_no1_count,
    x='Weeks to No. 1',
    y='Counts',
    color_discrete_sequence=['skyblue'],
    title='Top 5 No. 1 Songs with the Fewest Weeks to Reach the Top Position (2022-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=22),
    xaxis_title='Weeks to No. 1',
    yaxis_title='Counts',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=12)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Monthly New Songs in Hot 100 (2022-2024)

In [39]:
# Filter for new songs
new_songs = df[df['Last Week Position'].isna()]

# Count how many new songs are in each month and year
new_songs_by_month = new_songs.groupby(['Year', 'Month']).size().reset_index(name='Count of New Songs')

# Sort by Year and Month in ascending order
new_songs_by_month_sorted = new_songs_by_month.sort_values(by=['Year', 'Month'], ascending=True)

# Display the final DataFrame
print(new_songs_by_month_sorted)

    Year Month  Count of New Songs
0   2022    01                 103
1   2022    02                  54
2   2022    03                  45
3   2022    04                  57
4   2022    05                  89
5   2022    06                  61
6   2022    07                  68
7   2022    08                  64
8   2022    09                  58
9   2022    10                  58
10  2022    11                  60
11  2022    12                  85
12  2023    01                  66
13  2023    02                  44
14  2023    03                  70
15  2023    04                  56
16  2023    05                  33
17  2023    06                  56
18  2023    07                 107
19  2023    08                  67
20  2023    09                  76
21  2023    10                  80
22  2023    11                  69
23  2023    12                  77
24  2024    01                  75
25  2024    02                  53
26  2024    03                  58
27  2024    04      

In [40]:
# Monthly New Songs in Hot 100 (2022-2024)
# Create the heatmap
fig = px.density_heatmap(new_songs_by_month_sorted,
                          x='Month',
                          y='Year',
                          z='Count of New Songs',
                          title='Monthly New Songs in Hot 100 (2022-2024)',
                          labels={'Count of New Songs': 'Number of New Songs'})

fig.update_layout(
    title=dict(
        text='Monthly New Songs in Hot 100 (2022-2024)',
        font=dict(size=24),  
        x=0.5
    )
)

# Show the heatmap
fig.show()
