# Scrapping the Billboard for 100 hot songs

Billboard maintains a weekly Top 100 of "hot" songs here: https://www.billboard.com/charts/hot-100.

Scrape the current top 100 songs and their respective artists, and put the information into a pandas dataframe.

In [1]:
from bs4 import BeautifulSoup
import re
import requests
import pandas as pd

In [2]:
url = "https://www.billboard.com/charts/hot-100"

response = requests.get(url)

In [3]:
#check response status code 
response.status_code

200

In [4]:
#parse and store the contents of the url call
soup = BeautifulSoup(response.content,'html.parser')

In [5]:
#prettify the soup 
# soup.prettify()

# Query the soup to get songs and their respective artists

In [6]:
# get song title
soup.select('h3.c-title.a-no-trucate')[0].text

'\nEasy On Me\n'

In [7]:
soup.select('span.c-label.a-font-primary-s')[0].text

'\nAdele\n'

# Creating a dataframe from scraped data

In [8]:
song_title = []
artist = []

len_movies = len(soup.select('h3.c-title.a-no-trucate'))

In [9]:
len_movies

100

In [10]:
for i in range(len_movies):
    song_title.append(soup.select('h3.c-title.a-no-trucate')[i].text)
    artist.append(soup.select('span.c-label.a-font-primary-s')[i].text)

In [11]:
hot_songs = pd.DataFrame({'song_title':song_title,'artist':artist})

In [12]:
hot_songs.head()

Unnamed: 0,song_title,artist
0,\nEasy On Me\n,\nAdele\n
1,\nStay\n,\nThe Kid LAROI & Justin Bieber\n
2,\nIndustry Baby\n,\nLil Nas X & Jack Harlow\n
3,\nAll Too Well (Taylor's Version)\n,\nTaylor Swift\n
4,\nOh My God\n,\nAdele\n


# Cleaning / Wrangling steps for the scraped data 

In [13]:
hot_songs['song_title'] = hot_songs['song_title'].str.replace('\n','')
hot_songs['artist'] = hot_songs['artist'].str.replace('\n','')

In [14]:
hot_songs.head()

Unnamed: 0,song_title,artist
0,Easy On Me,Adele
1,Stay,The Kid LAROI & Justin Bieber
2,Industry Baby,Lil Nas X & Jack Harlow
3,All Too Well (Taylor's Version),Taylor Swift
4,Oh My God,Adele


In [15]:
from random import randint

In [73]:
def matchingString(x,y):
    match=''
    for i in range(0,len(x)):
        for j in range(0,len(y)):
            k=1
            # now applying while condition untill we find a substring match and length of substring is less than length of x and y
            while (i+k <= len(x) and j+k <= len(y) and x[i:i+k]==y[j:j+k]):
                if len(match) <= len(x[i:i+k]):
                    match = x[i:i+k]
                k=k+1
    return match 


In [None]:
def recommender():
    song = input('What is the name of your song? ')
    common = {}

    for j in range(len(hot_songs['song_title'])):
        match = matchingString(song.lower(), hot_songs['song_title'][j].lower())
        key = hot_songs['song_title'][j].lower()
        common[key] = match 
        #print(max(len(match) for match in list(common.items())))
        if len(match) == max(len(match) for match in list(common.values())):
            guess = [k for k,v in common.items() if v == match]
    #print(common)
    #print(guess)
    for i in range(len(guess)):
        answer = input("Do you mean " + guess[i].title()+ " by " + hot_songs['artist'][i] +  "? ")
        if answer.lower() == 'yes':
            x = randint(1,100)
            print("Nice! This is a hot song! You might also like " + hot_songs['song_title'][x].title() + " by " + hot_songs['artist'][x])
            break
        elif answer.lower() == 'no':
            i += 1
            if i not in range(len(guess)):
                print('Sorry then, not that hot one!') 



In [130]:
recommender()

What is the name of your song? eas
Do you mean Easy On Me by Adele? no
Do you mean Ex For A Reason by The Kid LAROI & Justin Bieber? no
Do you mean Too Easy by Lil Nas X & Jack Harlow? yes
Nice! This is a hot song! You might also like Cold As You by Luke Combs


In [131]:
recommender()

What is the name of your song? eas
Do you mean Easy On Me by Adele? no
Do you mean Ex For A Reason by The Kid LAROI & Justin Bieber? no
Do you mean Too Easy by Lil Nas X & Jack Harlow? no
Sorry then, not that hot one!
