In [13]:
import requests
import re
from bs4 import BeautifulSoup
from datetime import datetime
import time
import calendar
import json

In [15]:
# get the top 10 songs at the end of each month of a given year

def get_last_day_of_month(year: int, month: int) -> str:
    # Get the last day of the given month and year.
    # calendar.monthrange returns (weekday, last_day_of_month)
    _, last_day = calendar.monthrange(year, month)
    return f"{year}-{month:02d}-{last_day:02d}"

def get_top_10_songs(year: int):
    all_songs = {}

    # Loop through each month from 1 (January) to 12 (December)
    for month in range(1, 13):
        # Get the last day of the current month
        last_day = get_last_day_of_month(year, month)

        # Construct the URL for the Billboard Hot 100 for the last day of the month.
        url = f"https://www.billboard.com/charts/hot-100/{last_day}/"

        # Send the request to the URL.
        headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
      }

        response = requests.get(url, headers=headers)
        time.sleep(1)
        if response.status_code != 200:
            print(f"Error fetching the page for {last_day}: {response.status_code}")
            continue

        # Parse the page content using BeautifulSoup.
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the song entries (assumes the same structure for each page)
        song_entries = soup.find_all('ul', class_='o-chart-results-list-row')

        if not song_entries:
            print(f"Failed to find songs for {last_day}. Please check the HTML structure.")
            continue

        # List to hold the top 10 songs for each month
        songs = []

        # Loop through the first 10 song entries and extract details.
        for entry in song_entries[:10]:

            # Extract song title
            title_tag = entry.find('h3', id='title-of-a-story')

            # Extract artist(s)
            artist_tags = entry.find_all('span', class_='c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only')

            # First artist (has a specific class u-font-size-20@tablet)
            first_artist_tag = entry.find('span', class_='c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only u-font-size-20@tablet')

            # If the first artist is found, insert it at the start of the list
            if first_artist_tag:
                artist_tags.insert(0, first_artist_tag)  # Insert the first artist at the beginning

            song_title = title_tag.text.strip()

            first_artist = artist_tags[0].text.strip() if artist_tags else ''
            additional_artists = [artist.text.strip() for artist in artist_tags[1:]]
            all_artists = first_artist + (' ft. ' + ', '.join(additional_artists) if additional_artists else '')

            # Add the song to the list for this month
            songs.append(f"{song_title} by {all_artists}")

        # Store the top 10 songs for this month
        all_songs[last_day] = songs

    return all_songs

def get_top_songs_for_multiple_years(start_year: int, end_year: int):
    all_years_songs = {}

    # Loop through each year from start_year to end_year
    for year in range(start_year, end_year + 1):
        print(f"Fetching top 10 songs for {year}...")
        year_songs = get_top_10_songs(year)

        if year_songs:
            all_years_songs[year] = year_songs
        else:
            print(f"Failed to retrieve songs for {year}.")

    return all_years_songs

# Get top 10 songs for each month from 2010 to 2023
start_year = 2010
end_year = 2023

top_songs_by_year = get_top_songs_for_multiple_years(start_year, end_year)

# Optionally, save the results to a JSON file for later use
with open('top_songs_2010_2024.json', 'w') as f:
    json.dump(top_songs_by_year, f, indent=4)

# Print all of the songs
if top_songs_by_year:
    for year, months in top_songs_by_year.items():
        print(f"\nTop 10 Billboard Hot 100 Songs for {year}:")
        for date, songs in months.items():
            print(f"\nFor {date}:")
            for song in songs:
                print(song)
else:
    print("Failed to retrieve songs for the specified years.")

Fetching top 10 songs for 2010...
Fetching top 10 songs for 2011...
Fetching top 10 songs for 2012...
Fetching top 10 songs for 2013...
Fetching top 10 songs for 2014...
Fetching top 10 songs for 2015...
Fetching top 10 songs for 2016...
Fetching top 10 songs for 2017...
Fetching top 10 songs for 2018...
Fetching top 10 songs for 2019...
Fetching top 10 songs for 2020...
Fetching top 10 songs for 2021...
Fetching top 10 songs for 2022...
Fetching top 10 songs for 2023...

Top 10 Billboard Hot 100 Songs for 2010:

For 2010-01-31:
TiK ToK by Ke$ha
Today Was A Fairytale by Taylor Swift
Bad Romance by Lady Gaga
BedRock by Young Money Featuring Lloyd
Baby by Justin Bieber Featuring Ludacris
Replay by Iyaz
Sexy Chick by David Guetta Featuring Akon
Empire State Of Mind by Jay-Z + Alicia Keys
Hard by Rihanna Featuring Jeezy
Hey, Soul Sister by Train

For 2010-02-28:
Imma Be by The Black Eyed Peas
TiK ToK by Ke$ha
BedRock by Young Money Featuring Lloyd
Bad Romance by Lady Gaga
Need You Now by L

In [17]:
# Extract all songs from 2010-2023 from code above
all_songs = []

for year, months in top_songs_by_year.items():  # Loop through each year
    for month, songs in months.items():  # Loop through each month
        all_songs.extend(songs)  # Add the songs to the list
all_songs

['TiK ToK by Ke$ha',
 'Today Was A Fairytale by Taylor Swift',
 'Bad Romance by Lady Gaga',
 'BedRock by Young Money Featuring Lloyd',
 'Baby by Justin Bieber Featuring Ludacris',
 'Replay by Iyaz',
 'Sexy Chick by David Guetta Featuring Akon',
 'Empire State Of Mind by Jay-Z + Alicia Keys',
 'Hard by Rihanna Featuring Jeezy',
 'Hey, Soul Sister by Train',
 'Imma Be by The Black Eyed Peas',
 'TiK ToK by Ke$ha',
 'BedRock by Young Money Featuring Lloyd',
 'Bad Romance by Lady Gaga',
 'Need You Now by Lady Antebellum',
 'We Are The World 25: For Haiti by Artists For Haiti',
 'Hey, Soul Sister by Train',
 'How Low by Ludacris',
 'In My Head by Jason Derulo',
 'Say Aah by Trey Songz Featuring Fabolous',
 'Rude Boy by Rihanna',
 "Nothin' On You by B.o.B Featuring Bruno Mars",
 'Telephone by Lady Gaga Featuring Beyonce',
 'Need You Now by Lady Antebellum',
 'Break Your Heart by Taio Cruz Featuring Ludacris',
 'Imma Be by The Black Eyed Peas',
 'Hey, Soul Sister by Train',
 'BedRock by Young 

In [19]:
# get Genius lyric pages of a given song
def get_genius_lyric_page(song_title: str, artist_name: str) -> str:
    # Remove any 'feat.', 'featuring', or 'ft.' and everything after it from the artist name
    artist_name = re.sub(r'\s*(feat\.|featuring|ft\.|With\.)\s*.*', '', artist_name, flags=re.IGNORECASE).strip()

    # Handle cases where there are multiple artists and join with 'and'
    artist_parts = artist_name.split("&")
    artist_parts = [name.strip() for name in artist_parts]  # Remove any surrounding spaces
    if len(artist_parts) > 1:
        artist_name = " and ".join(artist_parts)  # Join with "and" for multiple artists

    # Clean up the artist name by replacing certain characters
    artist_name = artist_name.replace("$", "s")
    artist_name = artist_name.replace(" X ", " and ")
    artist_name = re.sub(r'[^a-zA-Z0-9 ]', '', artist_name)  # Remove special characters
    artist_name = artist_name.lower().replace(" ", "-").replace(", ", "-").replace(".", "").replace("*", "-")


    # Clean up the song title by replacing certain characters to construct the correct url
    song_title = song_title.replace("'", "").lower().replace(" ", "-").replace("(", "").replace(")", "").replace(".", "").replace("[", "").replace("]", "").replace("?", "").replace("...", "").replace(",", "").replace("!", "").replace("&", "and")

    # Build the Genius URL in the required format
    genius_url = f"https://genius.com/{artist_name}-{song_title}-lyrics"

    return genius_url

song_title = "Taste"
artist_name = "Sabrina Carpenter"
genius_url = get_genius_lyric_page(song_title, artist_name)
print(genius_url)

https://genius.com/sabrina-carpenter-taste-lyrics


In [21]:
# print Genius URLs for all songs
genius_urls = []
for song in all_songs:
    # Split the song string into title and artist(s)
    song_title, artist_name = song.split(" by ", 1)

    # Get the Genius URL for the song
    genius_url = get_genius_lyric_page(song_title, artist_name)
    genius_urls.append(genius_url)
    # Print the Genius URL
    print(f"{song_title} by {artist_name} - {genius_url}")

TiK ToK by Ke$ha - https://genius.com/kesha-tik-tok-lyrics
Today Was A Fairytale by Taylor Swift - https://genius.com/taylor-swift-today-was-a-fairytale-lyrics
Bad Romance by Lady Gaga - https://genius.com/lady-gaga-bad-romance-lyrics
BedRock by Young Money Featuring Lloyd - https://genius.com/young-money-bedrock-lyrics
Baby by Justin Bieber Featuring Ludacris - https://genius.com/justin-bieber-baby-lyrics
Replay by Iyaz - https://genius.com/iyaz-replay-lyrics
Sexy Chick by David Guetta Featuring Akon - https://genius.com/david-guetta-sexy-chick-lyrics
Empire State Of Mind by Jay-Z + Alicia Keys - https://genius.com/jayz--alicia-keys-empire-state-of-mind-lyrics
Hard by Rihanna Featuring Jeezy - https://genius.com/rihanna-hard-lyrics
Hey, Soul Sister by Train - https://genius.com/train-hey-soul-sister-lyrics
Imma Be by The Black Eyed Peas - https://genius.com/the-black-eyed-peas-imma-be-lyrics
TiK ToK by Ke$ha - https://genius.com/kesha-tik-tok-lyrics
BedRock by Young Money Featuring Ll

In [55]:
# Function to scrape description from the provided URL
def scrape_description(url: str) -> str:
    # Send GET request to the URL
    response = requests.get(url)

    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the div with the specific class
        description_div = soup.find('div', class_='RichText__Container-oz284w-0')
        if description_div:
            # Extract all text and links within the <p> tags inside the div
            paragraphs = description_div.find_all('p')
            description = ""
            for para in paragraphs:
                # Extract text from a paragraph
                para_text = para.get_text(strip=True)
                # Find all anchor tags in paragraph
                links = para.find_all('a')
                # Replaces the visible text of each link with a Markdown-style link 
                for link in links:
                    para_text = para_text.replace(link.get_text(), f"[{link.get_text()}]({link['href']})")
                # Add the processed paragraph text to a description string
                description += para_text + "\n\n"
            return description.strip()
        else:
            return "Description container not found."
    else:
        return f"Failed to fetch the page. Status code: {response.status_code}"

# Example usage with the Genius song URL (replace with the actual URL)
url = 'https://genius.com/Kesha-tik-tok-lyrics'
description = scrape_description(url)
print(description)

“TiK ToK” was Kesha’s debut single, written by Ke$ha, Benny Blanco, and Dr. Luke, the latter two doubling as the song’s producers. Kesha had previously found success as the hook singer on Flo Rida’s “Right Round,” but this 2009 party anthem made her a star in her own right.

The song follows a group of friends through their night out: from getting ready and arriving at the party to swatting away unwelcome guys and dancing till the cops show up. “TiK ToK” might seem like just another aural glitter bomb, but underneath the beat-heavy electro-pop​ is a message about being confident and showing no shame in having fun. It turned out to be a message that hit with a lot of listeners.

The song spent[38 weeks on the Billboard Hot 100](http://www.billboard.com/artist/305612/keha/chart?page=1&f=379).


In [41]:
# Extract the sentiment ratings of all songs
from textblob import TextBlob

def analyze_sentiment(description) -> str:
    # Create a TextBlob object
    blob = TextBlob(description)
    # Get the sentiment polarity (-1 to 1, where -1 is negative and 1 is positive)
    sentiment_polarity = blob.sentiment.polarity
    # Determine the sentiment
    if sentiment_polarity > 0:
        sentiment = "Positive"
    elif sentiment_polarity < 0:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, sentiment_polarity

# Example usage
sentiment, polarity = analyze_sentiment(description)
print(f"Sentiment: {sentiment}")
print(f"Polarity: {polarity}")

Sentiment: Positive
Polarity: 0.19833333333333333


In [43]:
import requests
from bs4 import BeautifulSoup
import re
import plotly.express as px

In [29]:
# Dictionary to store sentiment ratings
sentiment_ratings = {}

# Iterate over each URL in the list
for url in genius_urls:
    # Scrape the description for each song URL
    description = scrape_description(url)

    if description:
        # Analyze sentiment and polarity
        sentiment, polarity = analyze_sentiment(description)
        print(f"URL: {url}")
        print(f"Sentiment: {sentiment}")
        print(f"Polarity: {polarity}")

        # Store the sentiment and polarity in the dictionary with the URL as key
        sentiment_ratings[url] = (sentiment, polarity)
    else:
        print(f"Failed to retrieve description for URL: {url}")

# Print the final dictionary with URLs and sentiment ratings
print("Sentiment Ratings Dictionary:")
print(sentiment_ratings)

URL: https://genius.com/kesha-tik-tok-lyrics
Sentiment: Positive
Polarity: 0.19027777777777777
URL: https://genius.com/taylor-swift-today-was-a-fairytale-lyrics
Sentiment: Positive
Polarity: 0.28125
URL: https://genius.com/lady-gaga-bad-romance-lyrics
Sentiment: Positive
Polarity: 0.2219291125541126
URL: https://genius.com/young-money-bedrock-lyrics
Sentiment: Positive
Polarity: 0.11952380952380953
URL: https://genius.com/justin-bieber-baby-lyrics
Sentiment: Positive
Polarity: 0.17551020408163268
URL: https://genius.com/iyaz-replay-lyrics
Sentiment: Positive
Polarity: 0.11785714285714287
URL: https://genius.com/david-guetta-sexy-chick-lyrics
Sentiment: Neutral
Polarity: 0.0
URL: https://genius.com/jayz--alicia-keys-empire-state-of-mind-lyrics
Sentiment: Negative
Polarity: -0.5
URL: https://genius.com/rihanna-hard-lyrics
Sentiment: Positive
Polarity: 0.11838624338624339
URL: https://genius.com/train-hey-soul-sister-lyrics
Sentiment: Positive
Polarity: 0.2767857142857143
URL: https://gen

In [45]:
# Initialize a dictionary to store songs by year
songs_by_year = {}

# Iterate through the top_songs_by_year dictionary
for year, months in top_songs_by_year.items():
    for month, songs in months.items():
        # For each month, append the songs to the dictionary under the corresponding year
        if year not in songs_by_year:
            songs_by_year[year] = []
        songs_by_year[year].extend(songs)  # Add all songs for this month to the list of songs for the year

# Print the dictionary with songs classified by year
for year, songs in songs_by_year.items():
    print(f"Year: {year}")
    for song in songs:
        print(f"  - {song}")

Year: 2010
  - TiK ToK by Ke$ha
  - Today Was A Fairytale by Taylor Swift
  - Bad Romance by Lady Gaga
  - BedRock by Young Money Featuring Lloyd
  - Baby by Justin Bieber Featuring Ludacris
  - Replay by Iyaz
  - Sexy Chick by David Guetta Featuring Akon
  - Empire State Of Mind by Jay-Z + Alicia Keys
  - Hard by Rihanna Featuring Jeezy
  - Hey, Soul Sister by Train
  - Imma Be by The Black Eyed Peas
  - TiK ToK by Ke$ha
  - BedRock by Young Money Featuring Lloyd
  - Bad Romance by Lady Gaga
  - Need You Now by Lady Antebellum
  - We Are The World 25: For Haiti by Artists For Haiti
  - Hey, Soul Sister by Train
  - How Low by Ludacris
  - In My Head by Jason Derulo
  - Say Aah by Trey Songz Featuring Fabolous
  - Rude Boy by Rihanna
  - Nothin' On You by B.o.B Featuring Bruno Mars
  - Telephone by Lady Gaga Featuring Beyonce
  - Need You Now by Lady Antebellum
  - Break Your Heart by Taio Cruz Featuring Ludacris
  - Imma Be by The Black Eyed Peas
  - Hey, Soul Sister by Train
  - BedR

In [None]:
import pandas as pd
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange, HoverTool
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20

# Create DataFrame as before
data = []
for year, songs in songs_by_year.items():
    sentiment_counts = {'Positive': 0, 'Negative': 0, 'Neutral': 0}
    for song in songs:
        song_title, artist_name = song.split(" by ", 1)
        genius_url = get_genius_lyric_page(song_title, artist_name)
        sentiment, _ = sentiment_ratings.get(genius_url, ('Neutral', 0))
        if sentiment in sentiment_counts:
            sentiment_counts[sentiment] += 1
    data.append({
        'Year': year,
        'Positive': sentiment_counts['Positive'],
        'Negative': sentiment_counts['Negative'],
        'Neutral': sentiment_counts['Neutral']
    })

df = pd.DataFrame(data)
df = df.set_index('Year')
df = df.stack().reset_index()
df.columns = ['Year', 'Sentiment', 'Count']

# Prepare the data for Bokeh
years = df['Year'].unique().tolist()
sentiments = df['Sentiment'].unique().tolist()
counts = df['Count'].tolist()

data = {'Year_Sentiment': [(str(year), sentiment) for year in years for sentiment in sentiments], 'Count': counts}
source = ColumnDataSource(data=data)

# Define the custom colors for each sentiment type
sentiment_colors = {
    'Positive': 'mediumpurple',
    'Negative': 'red',
    'Neutral': 'grey'
}
colors = [sentiment_colors[sentiment] for sentiment in sentiments]

# Create the plot
p = figure(x_range=FactorRange(*data['Year_Sentiment']), 
           title="Sentiment Analysis of Songs by Year",
           toolbar_location=None, tools="", height=400, width=800)

p.vbar(x='Year_Sentiment', top='Count', width=0.9, source=source, line_color="white", 
       fill_color=factor_cmap('Year_Sentiment', palette=colors, factors=sentiments, start=1, end=2))

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1.2
p.xgrid.grid_line_color = None

# Add hover tool
hover = HoverTool()
hover.tooltips = [("Year", "@Year_Sentiment"), ("Count", "@Count")]
p.add_tools(hover)

# Save and show the plot
output_file("sentiment_analysis.html")
show(p)