In [9]:
# Billboard Hot 100 Scraper
# Copy this into your data_collection.ipynb

import requests
from bs4 import BeautifulSoup
import pandas as pd

# ============================================
# STEP 1: Scrape Billboard Hot 100
# ============================================

url = "https://www.billboard.com/charts/hot-100/"
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Current Billboard CSS selectors (as of late 2024)
    # These may need tweaking if Billboard updates their site
    
    # Song titles
    songs = [element.get_text(strip=True) for element in 
             soup.select('li.o-chart-results-list__item h3.c-title')]
    
    # Artist names  
    artists = [element.get_text(strip=True) for element in 
               soup.select('li.o-chart-results-list__item span.c-label.a-no-trucate')]
    
    print(f"Found {len(songs)} songs and {len(artists)} artists")
    
else:
    print(f"Failed to retrieve page. Status code: {response.status_code}")

# ============================================
# STEP 2: Create DataFrame
# ============================================

# Make sure we have matching lengths
min_len = min(len(songs), len(artists))
songs = songs[:min_len]
artists = artists[:min_len]

billboard_df = pd.DataFrame({
    'song_title': songs,
    'artist': artists
})

print(billboard_df.head(10))
print(f"\nTotal songs scraped: {len(billboard_df)}")

# ============================================
# STEP 3: Save to CSV
# ============================================

billboard_df.to_csv('/Users/chandlershortlidge/Desktop/Ironhack/DA_FT_Extra_Week10/data/billboard_hot100.csv', index=False)
print("\nSaved to data/billboard_hot100.csv")


# ============================================
# DEBUGGING: If the above doesn't work
# ============================================
# 
# Billboard changes their HTML structure often. If you're not getting
# 100 songs, uncomment this block to inspect the page structure:
#
# # Look at all h3 tags
# all_h3 = soup.find_all('h3')
# print(f"Found {len(all_h3)} h3 tags")
# for i, h3 in enumerate(all_h3[:5]):
#     print(f"{i}: {h3.get_text(strip=True)[:50]}... | classes: {h3.get('class')}")
#
# # Look at the raw HTML around the first song
# chart_list = soup.select('li.o-chart-results-list__item')
# if chart_list:
#     print("\nFirst chart item HTML:")
#     print(chart_list[0].prettify()[:1000])

Found 100 songs and 100 artists
                                    song_title  \
0              All I Want For Christmas Is You   
1                               Last Christmas   
2            Rockin' Around The Christmas Tree   
3                             Jingle Bell Rock   
4                                       Golden   
5                          The Fate Of Ophelia   
6                                     Ordinary   
7                                Santa Tell Me   
8  The Christmas Song (Merry Christmas To You)   
9     It's The Most Wonderful Time Of The Year   

                                 artist  
0                          Mariah Carey  
1                                 Wham!  
2                            Brenda Lee  
3                           Bobby Helms  
4  HUNTR/X: EJAE, Audrey Nuna & REI AMI  
5                          Taylor Swift  
6                           Alex Warren  
7                         Ariana Grande  
8                       Nat "King" Cole

In [11]:
import spotipy
import pandas as pd
import json
from spotipy.oauth2 import SpotifyClientCredentials
import spotify_config 
import pprint

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= spotify_config.client_id,
                                                           client_secret= spotify_config.client_secret))



results = sp.search(q="daddy cool",limit=5,market="GB")
results


pprint.pprint(results)

pprint.pprint(results["tracks"]["items"][0]["id"])

{'tracks': {'href': 'https://api.spotify.com/v1/search?offset=0&limit=5&query=daddy%20cool&type=track&market=GB',
            'items': [{'album': {'album_type': 'album',
                                 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/54R6Y0I7jGUCveDTtI21nb'},
                                              'href': 'https://api.spotify.com/v1/artists/54R6Y0I7jGUCveDTtI21nb',
                                              'id': '54R6Y0I7jGUCveDTtI21nb',
                                              'name': 'Boney M.',
                                              'type': 'artist',
                                              'uri': 'spotify:artist:54R6Y0I7jGUCveDTtI21nb'}],
                                 'external_urls': {'spotify': 'https://open.spotify.com/album/1KQUrny9y5zGpktF6hAGd4'},
                                 'href': 'https://api.spotify.com/v1/albums/1KQUrny9y5zGpktF6hAGd4',
                                 'id': '1KQUrny9y5zGpktF

In [None]:
print("The json file has the following keys: ",list(results.keys())) # We can see that we only have tracks
print("The 'tracks' key has the following child keys: ",list(results["tracks"].keys())) # Let's check the values
print("The query we made is: ",results["tracks"]["href"]) # Query we have searched 
print("The song's info is contained in: ",results["tracks"]["items"]) #items (actual tracks)
print("The limit of the query we've made is: ",results["tracks"]["limit"]) #Limit we have chosen
print("The next page if any: ",results["tracks"]["next"]) #link to the next page (next 50 tracks)
print("The starting webpage: ",results["tracks"]["offset"]) # Actual offset (starting point)
print("Starting webpage: ",results["tracks"]["previous"]) #Previous search
print("Total number of results: ",results["tracks"]["total"]) # Number of matches

The json file has the following keys:  ['tracks']
The 'tracks' key has the following child keys:  ['href', 'limit', 'next', 'offset', 'previous', 'total', 'items']
The query we made is:  https://api.spotify.com/v1/search?offset=0&limit=5&query=daddy%20cool&type=track&market=GB
The song's info is contained in:  [{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/54R6Y0I7jGUCveDTtI21nb'}, 'href': 'https://api.spotify.com/v1/artists/54R6Y0I7jGUCveDTtI21nb', 'id': '54R6Y0I7jGUCveDTtI21nb', 'name': 'Boney M.', 'type': 'artist', 'uri': 'spotify:artist:54R6Y0I7jGUCveDTtI21nb'}], 'external_urls': {'spotify': 'https://open.spotify.com/album/1KQUrny9y5zGpktF6hAGd4'}, 'href': 'https://api.spotify.com/v1/albums/1KQUrny9y5zGpktF6hAGd4', 'id': '1KQUrny9y5zGpktF6hAGd4', 'images': [{'height': 640, 'width': 640, 'url': 'https://i.scdn.co/image/ab67616d0000b273dafd1cd6e9537ec8463ea691'}, {'height': 300, 'width': 300, 'url': 'https://i.scdn.co/ima

In [13]:

results["tracks"]["items"][0]["artists"] # Track artists
results["tracks"]["items"][0]["id"] # Track ID
results["tracks"]["items"][0]["name"] # Track name
results["tracks"]["items"][0]["popularity"] # Popularity index
results["tracks"]["items"][0]["uri"] # Basically ID

'spotify:track:3WMbD1OyfKuwWDWMNbPQ4g'

In [14]:

len(results['tracks']["items"])

5

In [15]:
track_id=results["tracks"]["items"][0]["id"]
track_id

for item in results['tracks']['items']:
    print("The name of song is: '{}' and the id is: {}".format(item['name'],item["id"]))

The name of song is: 'Daddy Cool' and the id is: 3WMbD1OyfKuwWDWMNbPQ4g
The name of song is: 'Rasputin' and the id is: 5lWSa1rmuSL6OBPOnkAqoa
The name of song is: 'Daddy Cool' and the id is: 702Xo5V8OhlY2cQEczCT2j
The name of song is: 'Get Down Saturday Night' and the id is: 58bsVCyN73XiWPhxRmvZi3
The name of song is: 'Daddy Cool' and the id is: 5ZOnUUok62w4qiBkk6XKRP


In [16]:
from IPython.display import IFrame

track_id = '3WMbD1OyfKuwWDWMNbPQ4g'
#track_id= 'spotify:track:3hgl7EQwTutSm6PESsB7gZ'
IFrame(src="https://open.spotify.com/embed/track/"+track_id,
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

In [17]:
def play_song(track_id):
    return IFrame(src="https://open.spotify.com/embed/track/"+track_id,
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

play_song('3WMbD1OyfKuwWDWMNbPQ4g')

In [21]:
song_list = ["lose yourself", "take five"]
rock_songs = ["highway to hell", "thunderstruck"]
jazz = ["autumn leaves", "mack the knife"]

In [19]:
import random

In [25]:
user_input = input("what type of music do you want to listen to?")
if user_input == "trending":
    random_song = random.choice(song_list)
    result = sp.search(q=random_song,limit=1,market="GB")
    track_id=result["tracks"]["items"][0]["id"]
    display(play_song(track_id))
elif user_input == "rock": 
    random_song = random.choice(rock_songs)
    result = sp.search(q=random_song,limit=1,market="GB")
    track_id=result["tracks"]["items"][0]["id"]
    display(play_song(track_id))
elif user_input == "jazz": 
    random_song = random.choice(jazz)
    result = sp.search(q=random_song,limit=1,market="GB")
    track_id=result["tracks"]["items"][0]["id"]
    display(play_song(track_id))