Billboard Hot 100 Chart Overview

In [None]:
# Import all libraries
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
import plotly.express as px

In [87]:
# Generate Saturdays from a specific year until today's date
def generate_saturdays(start_year):
    saturdays = []
    start_date = datetime(start_year, 1, 1)
    end_date = datetime.now()

    # Loop through dates until today
    while start_date <= end_date:
        if start_date.weekday() == 5:
            saturdays.append(start_date.strftime('%Y-%m-%d'))
        start_date += timedelta(days=1)

    return saturdays

# Generate Saturdays starting from 2019 until today
saturdays = generate_saturdays(2019)
print(saturdays)

['2019-01-05', '2019-01-12', '2019-01-19', '2019-01-26', '2019-02-02', '2019-02-09', '2019-02-16', '2019-02-23', '2019-03-02', '2019-03-09', '2019-03-16', '2019-03-23', '2019-03-30', '2019-04-06', '2019-04-13', '2019-04-20', '2019-04-27', '2019-05-04', '2019-05-11', '2019-05-18', '2019-05-25', '2019-06-01', '2019-06-08', '2019-06-15', '2019-06-22', '2019-06-29', '2019-07-06', '2019-07-13', '2019-07-20', '2019-07-27', '2019-08-03', '2019-08-10', '2019-08-17', '2019-08-24', '2019-08-31', '2019-09-07', '2019-09-14', '2019-09-21', '2019-09-28', '2019-10-05', '2019-10-12', '2019-10-19', '2019-10-26', '2019-11-02', '2019-11-09', '2019-11-16', '2019-11-23', '2019-11-30', '2019-12-07', '2019-12-14', '2019-12-21', '2019-12-28', '2020-01-04', '2020-01-11', '2020-01-18', '2020-01-25', '2020-02-01', '2020-02-08', '2020-02-15', '2020-02-22', '2020-02-29', '2020-03-07', '2020-03-14', '2020-03-21', '2020-03-28', '2020-04-04', '2020-04-11', '2020-04-18', '2020-04-25', '2020-05-02', '2020-05-09', '2020

In [88]:
# Scraping the charts
chart = []

for date_str in saturdays:
    url = f'https://www.billboard.com/charts/hot-100/{date_str}/'
    webpage_response = requests.get(url)
    webpage = webpage_response.content
    soup = BeautifulSoup(webpage, 'html.parser')

    # Extract titles
    title1 = soup.find_all(class_='c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet')
    title2 = soup.find_all(class_='c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only')
    titles_combined = title1 + title2
    titles = [t.get_text(strip=True) for t in titles_combined]

    # Extract ranks
    rank = soup.find_all(class_='c-label a-font-primary-bold-l u-font-size-32@tablet u-letter-spacing-0080@tablet')
    ranks = [r.get_text(strip=True) for r in rank]

    # Extract singers
    singer1 = soup.find_all(class_='c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only u-font-size-20@tablet')
    singer2 = soup.find_all(class_='c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only')
    singer_combined = singer1 + singer2
    singers = [s.get_text(strip=True) for s in singer_combined]

    # Extract last week's positions and total weeks
    last_week_and_total_weeks = soup.find_all(class_='o-chart-results-list__item // a-chart-color u-width-72 u-width-55@mobile-max u-width-55@tablet-only lrv-u-flex lrv-u-flex-shrink-0 lrv-u-align-items-center lrv-u-justify-content-center lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light u-background-color-white-064@mobile-max u-hidden@mobile-max')
    last_week_and_total_weeks_combined = [lp.get_text(strip=True) for lp in last_week_and_total_weeks]
    last_week_positions = last_week_and_total_weeks_combined[0::2]
    total_weeks = last_week_and_total_weeks_combined[1::2]

    # Extract peak positions
    peak_position = soup.find_all(class_='o-chart-results-list__item // a-chart-bg-color a-chart-color u-width-72 u-width-55@mobile-max u-width-55@tablet-only lrv-u-flex lrv-u-flex-shrink-0 lrv-u-align-items-center lrv-u-justify-content-center lrv-u-background-color-grey-lightest lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light u-hidden@mobile-max')
    peak_positions = [pp.get_text(strip=True) for pp in peak_position]
    peak_positions_adjusted = peak_positions[1::2]

    # Combine all data into the chart list
    for t, r, s, l, p, tw in zip(titles, ranks, singers, last_week_positions, peak_positions_adjusted, total_weeks):
        date = datetime.strptime(date_str, '%Y-%m-%d')  # Convert date string back to datetime
        month = date.strftime("%m")  # Extract the month
        day = date.strftime("%d")  # Extract the dau
        year = date.strftime("%Y")  # Extract the year
        chart.append((year, month, day, r, t, s, l, p, tw))
        
    time.sleep(1)

# Print the final chart
print(chart)

[('2019', '05', '18', '1', 'Old Town Road', 'Lil Nas X Featuring Billy Ray Cyrus', '1', '1', '10'), ('2019', '05', '18', '2', "If I Can't Have You", 'Shawn Mendes', '-', '2', '1'), ('2019', '05', '18', '3', 'ME!', 'Taylor Swift Featuring Brendon Urie', '2', '2', '3'), ('2019', '05', '18', '4', 'Sucker', 'Jonas Brothers', '4', '1', '10'), ('2019', '05', '18', '5', 'Homicide', 'Logic Featuring Eminem', '-', '5', '1'), ('2019', '05', '18', '6', 'Wow.', 'Post Malone', '3', '2', '20'), ('2019', '05', '18', '7', 'Sunflower (Spider-Man: Into The Spider-Verse)', 'Post Malone & Swae Lee', '5', '1', '29'), ('2019', '05', '18', '8', 'Without Me', 'Halsey', '7', '1', '31'), ('2019', '05', '18', '9', 'Bad Guy', 'Billie Eilish', '9', '7', '6'), ('2019', '05', '18', '10', '7 Rings', 'Ariana Grande', '6', '1', '16'), ('2019', '05', '18', '11', 'Dancing With A Stranger', 'Sam Smith & Normani', '8', '7', '17'), ('2019', '05', '18', '12', 'Talk', 'Khalid', '10', '8', '13'), ('2019', '05', '18', '13', 'Sw

In [106]:
# Create DataFrame from chart list
df = pd.DataFrame(chart, columns=['Year', 'Month', 'Day', 'Rank', 'Title', 'Singer(s)', 'Last Week Position', 'Peak Position', 'Weeks on Chart'])

# Convert columns to numeric
df['Rank'] = pd.to_numeric(df['Rank'])
df['Last Week Position'] = pd.to_numeric(df['Last Week Position'], errors='coerce')
df['Peak Position'] = pd.to_numeric(df['Peak Position'], errors='coerce')
df['Weeks on Chart'] = pd.to_numeric(df['Weeks on Chart'], errors='coerce')

# Display the DataFrame
df

Unnamed: 0,Year,Month,Day,Rank,Title,Singer(s),Last Week Position,Peak Position,Weeks on Chart
0,2019,05,18,1,Old Town Road,Lil Nas X Featuring Billy Ray Cyrus,1.0,1,10
1,2019,05,18,2,If I Can't Have You,Shawn Mendes,,2,1
2,2019,05,18,3,ME!,Taylor Swift Featuring Brendon Urie,2.0,2,3
3,2019,05,18,4,Sucker,Jonas Brothers,4.0,1,10
4,2019,05,18,5,Homicide,Logic Featuring Eminem,,5,1
...,...,...,...,...,...,...,...,...,...
595,2023,12,16,96,Slime You Out,Drake Featuring SZA,90.0,1,12
596,2023,12,16,97,Rich Men North Of Richmond,Oliver Anthony Music,99.0,1,17
597,2023,12,16,98,Soak City,310babii,,98,2
598,2023,12,16,99,Turks & Caicos,Rod Wave Featuring 21 Savage,91.0,24,12


Top 10 Songs by Accumulated Weeks at No. 1 (2019-2024)

In [107]:
# Filter for songs that were No. 1
no1_songs = df[df['Rank'] == 1]

# Create a new DataFrame with just the Year, Title, and Singer(s)
# Group by Title and Singer(s) to count the weeks at No. 1
accumulated_weeks_no1 = no1_songs.groupby(['Year', 'Title', 'Singer(s)']).size().reset_index(name='Accumulated Weeks')
# Sort by accumulated weeks at No. 1
accumulated_weeks_no1 = accumulated_weeks_no1.sort_values(by=['Accumulated Weeks'], ascending=False)
# Show the top 10 songs with the most accumulated weeks at No.1
top_10_no1 = accumulated_weeks_no1.head(10)

# Display the final DataFrame
print(top_10_no1)

   Year                              Title  \
4  2023  Rockin' Around The Christmas Tree   
0  2019                            Circles   
1  2019                      Old Town Road   
2  2023                         Last Night   
3  2023                        Lovin On Me   

                             Singer(s)  Accumulated Weeks  
4                           Brenda Lee                  2  
0                          Post Malone                  1  
1  Lil Nas X Featuring Billy Ray Cyrus                  1  
2                        Morgan Wallen                  1  
3                          Jack Harlow                  1  


In [91]:
# Top 10 Songs by Accumulated Weeks at No. 1 (2019-2024)

# Create a bar chart
fig = px.bar(
    top_10_no1,
    x='Title',
    y='Accumulated Weeks',
    hover_data=['Singer(s)'],
    color='Accumulated Weeks',
    title='Top 10 Songs by Most Accumulated Weeks at No. 1 (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    xaxis_title='Songs',
    yaxis_title='Accumulated Weeks at No.1',
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        tickmode='linear'
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()


Monthly New Songs in the Top 10 (2019-2024)

In [92]:
# Filter for new songs that were in Top 10
top10_songs = df[(df['Rank'] <= 10) & (df['Last Week Position'] > 10)]

# Create a new DataFrame with just the Year, Month, Title, and Singer(s)
# Group by Year and Month to count the number of Top 10 songs
top10_songs_by_month = top10_songs.groupby(['Year', 'Month']).size().reset_index(name='Monthly New Songs in the Top 10')
# Combine Year and Month
top10_songs_by_month['Year-Month'] = top10_songs_by_month['Year'] + '-' + top10_songs_by_month['Month']
# Drop the original Year and Month columns
top10_songs_by_month = top10_songs_by_month.drop(columns=['Year', 'Month'])
# Rearrange the columns so that 'Year and Month' comes first
top10_songs_by_month = top10_songs_by_month[['Year-Month', 'Monthly New Songs in the Top 10']]

# Display the final DataFrame
print(top10_songs_by_month)

  Year-Month  Monthly New Songs in the Top 10
0    2019-11                                1
1    2023-12                                6


In [93]:
# Monthly New Songs in the Top 10 (2019-2024)

# Create a line chart
fig = px.line(
    top10_songs_by_month,
    x='Year-Month',
    y='Monthly New Songs in the Top 10',
    color_discrete_sequence=['red'],
    title='Monthly New Songs in the Top 10 (2019-2024)',
)

# Customize the chart
fig.update_traces(hovertemplate='Year-Month: %{x|%Y-%m}<br>Value: %{y}')
fig.update_layout(
    xaxis_title='Year-Month',
    yaxis_title='Counts',
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ))

# Show the chart
fig.show()


Top 5 Singers with the most numbers of Top 10 Songs (2019-2024)

In [94]:
# Filter for songs that were in the top 10
top10_songs = df[df['Rank'] <= 10]

# Drop duplicates based on 'Title' and 'Singer(s)' to ensure each song is counted once per artist
unique_top10_songs = top10_songs.drop_duplicates(subset=['Title', 'Singer(s)'])

# Group by Singer(s) to count the number of unique top 10 songs
top10_songs_singers = unique_top10_songs.groupby('Singer(s)').size().reset_index(name='No. of Top 10 Songs')

# Sort by the number of top 10 songs
top10_songs_singers = top10_songs_singers.sort_values(by='No. of Top 10 Songs', ascending=False)

# Show the top 5 singers with the most number of unique top 10 songs
top5_singers = top10_songs_singers.head(5)

# Display the final DataFrame
print(top5_singers)

        Singer(s)  No. of Top 10 Songs
3   Billie Eilish                    2
17          Lizzo                    2
25    Post Malone                    2
33   Taylor Swift                    2
28            SZA                    2


In [95]:
# Top 5 Singers with the most numbers of Top 10 Songs (2019-2024)

# Create a bar chart
fig = px.bar(
    top5_singers,
    x='Singer(s)',
    y='No. of Top 10 Songs',
    color='No. of Top 10 Songs',
    title='Top 5 Singers with The Most Numbers of Top 10 Songs (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    xaxis_title='Singer(s)',
    yaxis_title='No. of Top 10 Songs',
    title_font=dict(size=24),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()


No.1 Songs with Most Weeks on Chart (2019-2024)

In [96]:
# Filter for songs that reached No. 1
no1_songs = df[df['Rank'] == 1]

# Group the songs by Title and Singer(s) and count the maximum weeks
weeks_count = no1_songs.groupby(['Title', 'Singer(s)'])['Weeks on Chart'].max().reset_index()

# Count how many songs spend each number of weeks on chart
weeks_to_no1_count = weeks_count['Weeks on Chart'].value_counts().reset_index()

# Rename the columns for clarity
weeks_to_no1_count.columns = ['Number of Weeks', 'Count of Songs']

# Sort by the number of weeks in ascending order
weeks_to_no1_count_sorted = weeks_to_no1_count.sort_values(by=['Number of Weeks'], ascending=False)

# Show the top 10 results
top10_weeks_to_no1_count_sorted = weeks_to_no1_count_sorted.head(10)

# Display the final DataFrame
print(top10_weeks_to_no1_count_sorted)


   Number of Weeks  Count of Songs
4               55               1
1               17               1
0               12               1
3               10               1
2                2               1


In [97]:
# No.1 Songs with Most Weeks on Chart (2019-2024)

# Create a bar chart
fig = px.bar(
    top10_weeks_to_no1_count_sorted,
    x='Number of Weeks',
    y='Count of Songs',
    color_discrete_sequence=['#E48F72'],
    title='No.1 Songs with Most Weeks on Chart (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    xaxis_title='Number of Weeks',
    yaxis_title='Count of Songs',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
        type='category'
    ),
    yaxis=dict(
        tickfont=dict(size=16),
        tickmode='linear'
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()


Songs with the most weeks on chart (2019-2024)

In [98]:
# Group the songs by Title and Singer(s) and count the maximum weeks
weeks_on_chart = df.groupby(['Title', 'Singer(s)'])['Weeks on Chart'].max().reset_index()

# Sort by the number of weeks in ascending order
weeks_on_chart_sorted = weeks_on_chart.sort_values(by=['Weeks on Chart'], ascending=False)

# Show the top 10 results
top10_weeks_on_chart_sorted = weeks_on_chart_sorted.head(10)

# Display the final DataFrame
print(top10_weeks_on_chart_sorted)


                                 Title                   Singer(s)  \
12     All I Want For Christmas Is You                Mariah Carey   
20                           As It Was                Harry Styles   
316            Something In The Orange                  Zach Bryan   
283  Rockin' Around The Christmas Tree                  Brenda Lee   
398                          You Proof               Morgan Wallen   
177                   Jingle Bell Rock                 Bobby Helms   
313                             Snooze                         SZA   
182                          Kill Bill                         SZA   
282              Rock And A Hard Place            Bailey Zimmerman   
117                     Girls Like You  Maroon 5 Featuring Cardi B   

     Weeks on Chart  
12               62  
20               60  
316              57  
283              55  
398              54  
177              53  
313              52  
182              50  
282              50  
117      

In [99]:
# Songs with the most weeks on chart (2019-2024)
# Create a bar chart
fig = px.bar(
    top10_weeks_on_chart_sorted,
    x='Title',
    y='Weeks on Chart',
    color='Weeks on Chart',
    hover_data=['Singer(s)'],
    title='Songs with The Most Weeks on Chart (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Songs',
    yaxis_title='Weeks on Chart',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Distribution of Last Week Positions among Top 10 Songs (2019-2024)

In [100]:
# Filter for songs that reached top 10 and were outside that the week before
top10_songs = df[(df['Rank'] <= 10) & (df['Last Week Position'] > 10)]

# Count how many songs took each number of weeks to get to No. 1
last_week_position = top10_songs['Last Week Position'].value_counts().reset_index()

# Rename the columns for clarity
last_week_position.columns = ['Last Week Position', 'Count of Songs']

# Sort by the number of weeks in ascending order
last_week_position_sorted = last_week_position.sort_values(by=['Last Week Position'], ascending=True)

# Show the top 10 results
last_week_position_sorted = last_week_position_sorted.head(20)

# Display the final DataFrame
print(last_week_position_sorted)

   Last Week Position  Count of Songs
3                12.0               1
4                13.0               1
2                15.0               1
5                16.0               1
1                17.0               1
6                28.0               1
0                74.0               1


In [101]:
# Distribution of Last Week Positions among Top 10 Songs (2019-2024)
# Create a histogram
fig = px.histogram(
    last_week_position_sorted,
    x='Last Week Position',
    y='Count of Songs',
    color='Last Week Position',
    title='Distribution of Last Week Positions among Top 10 Songs (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Last Week Position',
    yaxis_title='Count of Songs',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Peak Position vs. Total Weeks on Chart (2019-2024)

In [102]:
# Group the songs by Title, Singer(s), and Peak Position, and get the maximum weeks
weeks_on_chart = df.groupby(['Title', 'Singer(s)', 'Peak Position'])['Weeks on Chart'].max().reset_index()

# Sort the DataFrame by 'Weeks on Chart' in descending order
weeks_on_chart_sorted = weeks_on_chart.sort_values(by='Weeks on Chart', ascending=False)

# Display the final DataFrame
print(weeks_on_chart_sorted)

                                 Title                             Singer(s)  \
12     All I Want For Christmas Is You                          Mariah Carey   
21                           As It Was                          Harry Styles   
325            Something In The Orange                            Zach Bryan   
290  Rockin' Around The Christmas Tree                            Brenda Lee   
415                          You Proof                         Morgan Wallen   
..                                 ...                                   ...   
357                           The Take      Tory Lanez Featuring Chris Brown   
146                         Hell Right  Blake Shelton Featuring Trace Adkins   
226                       Middle Child               PnB Rock & XXXTENTACION   
356                      The Shoe Fits                                 Drake   
126                            Go Loko                      YG, Tyga & Jon Z   

     Peak Position  Weeks on Chart  
12

In [103]:
# Peak Position vs. Total Weeks on Chart (2019-2024)
# Create a scatter plot
fig = px.scatter(
    weeks_on_chart_sorted,
    x='Weeks on Chart',
    y='Peak Position',
    color_discrete_sequence=['skyblue'],
    hover_data=['Title', 'Singer(s)'],
    title='Peak Position vs. Total Weeks on Chart (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Weeks on Chart',
    yaxis_title='Peak Position',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Longest Running Songs in Top 10 (2019-2024)

In [104]:
# Filter for songs that reached Top 10
top10_songs = df[df['Rank'] <= 10]

# Count how many weeks each song spent in the Top 10, including the singer
weeks_top10_count = top10_songs.groupby(['Title', 'Singer(s)']).size().reset_index(name='Weeks in Top 10')

# Sort by the number of weeks in descending order
weeks_top10_count_sorted = weeks_top10_count.sort_values(by=['Weeks in Top 10'], ascending=False)

# Show the top 5 results
top_weeks_top10 = weeks_top10_count_sorted.head(5)

# Display the final DataFrame
print(top_weeks_top10)

                              Title     Singer(s)  Weeks in Top 10
35                           Snooze           SZA                3
9                      Cruel Summer  Taylor Swift                3
3   All I Want For Christmas Is You  Mariah Carey                3
27                      Lovin On Me   Jack Harlow                3
32               Paint The Town Red      Doja Cat                3


In [108]:
# Longest Running Songs in Top 10 (2019-2024)
# Create a bar chart
fig = px.bar(
    top_weeks_top10,
    x='Title',
    y='Weeks in Top 10',
    color='Weeks in Top 10',
    hover_data=['Singer(s)'],
    title='Longest Running Songs in Top 10 (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    title_x=0.5,
    xaxis_title='Songs',
    yaxis_title='Weeks in Top 10',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16),
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Top 5 No. 1 Songs with the Fewest Weeks to Reach the Top Position (2019-2024)

In [79]:
# Filter for songs that reached No. 1
no1_songs = df[df['Rank'] == 1]

# Find the weeks on chart for No. 1 songs grouped by title and singer
weeks_to_best = no1_songs.groupby(['Title', 'Singer(s)'])['Weeks on Chart'].min()

# Count how many songs took each number of weeks to get to No. 1
weeks_to_no1_count = weeks_to_best.value_counts().reset_index()

# Rename columns for clarity
weeks_to_no1_count.columns = ['Weeks to No. 1', 'Counts']

# Sort by the number of weeks in ascending order
weeks_to_no1_count_sorted = weeks_to_no1_count.sort_values(by='Weeks to No. 1', ascending=True)

# Get the top 5 results
top5_weeks_to_no1_count = weeks_to_no1_count_sorted.head(5)

# Display the final DataFrame
print(top5_weeks_to_no1_count)


    Weeks to No. 1  Counts
0                1      22
1                2       4
2                4       3
4                5       2
13               6       1


In [81]:
# Top 5 No. 1 Songs with the Fewest Weeks to Reach the Top Position (2019-2024)
# Create a bar chart
fig = px.bar(
    top5_weeks_to_no1_count,
    x='Weeks to No. 1',
    y='Counts',
    color_discrete_sequence=['skyblue'],
    title='Top 5 No. 1 Songs with the Fewest Weeks to Reach the Top Position (2019-2024)',
    width=1000
)

# Customize the chart
fig.update_layout(
    title_font=dict(size=24),
    xaxis_title='Weeks to No. 1',
    yaxis_title='Counts',
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    xaxis=dict(
        tickfont=dict(size=16)
    ),
    yaxis=dict(
        tickfont=dict(size=16)
    ),
    hoverlabel=dict(
        font=dict(
            family="Arial",
            size=14
        )
    ),
    bargap=0.2
)

# Show the chart
fig.show()

Monthly New Songs in Hot 100 (2022-2024)

In [35]:
# Filter for new songs
new_songs = df[df['Last Week Position'].isna()]

# Count how many new songs are in each month and year
new_songs_by_month = new_songs.groupby(['Year', 'Month']).size().reset_index(name='Count of New Songs')

# Sort by Year and Month in ascending order
new_songs_by_month_sorted = new_songs_by_month.sort_values(by=['Year', 'Month'], ascending=True)

# Display the final DataFrame
print(new_songs_by_month_sorted)

    Year Month  Count of New Songs
0   2022    01                 103
1   2022    02                  54
2   2022    03                  45
3   2022    04                  57
4   2022    05                  89
5   2022    06                  61
6   2022    07                  68
7   2022    08                  64
8   2022    09                  58
9   2022    10                  58
10  2022    11                  60
11  2022    12                  85
12  2023    01                  66
13  2023    02                  44
14  2023    03                  70
15  2023    04                  56
16  2023    05                  33
17  2023    06                  56
18  2023    07                 107
19  2023    08                  67
20  2023    09                  56
21  2023    10                  80
22  2023    11                  69
23  2023    12                  77
24  2024    01                  75
25  2024    02                  53
26  2024    03                  58
27  2024    04      

In [40]:
# Monthly New Songs in Hot 100 (2022-2024)
# Create the heatmap
fig = px.density_heatmap(new_songs_by_month_sorted,
                          x='Month',
                          y='Year',
                          z='Count of New Songs',
                          title='Monthly New Songs in Hot 100 (2019-2024)',
                          labels={'Count of New Songs': 'Number of New Songs'})

fig.update_layout(
    title=dict(
        text='Monthly New Songs in Hot 100 (2019-2024)',
        font=dict(size=24),  
        x=0.5
    )
)

# Show the heatmap
fig.show()
