# Web Scraping Single Page (popvortex)

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import random
from IPython.display import display
from ipywidgets import widgets, Output

In [None]:
url = 'https://www.popvortex.com/music/charts/top-100-songs.php'

In [None]:
response = requests.get(url)
response.status_code

In [None]:
soup = BeautifulSoup(response.content, "html.parser")

In [None]:
titles = [tag.get_text() for tag in soup.select('p > cite')]
len(titles)

In [None]:
artists = [tag.get_text() for tag in soup.select('p > em')]
len(artists)

In [None]:
genre_release_list = [tag.get_text() for tag in soup.select('.chart-wrapper li:has(strong)')]

genres = [genre.split(': ')[1] for genre in genre_release_list[::2]]
releases = [genre.split(': ')[1] for genre in genre_release_list[1::2]]

In [None]:
data = pd.DataFrame({
    'titles': titles,
    'artists': artists,
    'genres': genres,
    'release_date': releases
})

data.head()

# Web Scraping Single Page (billboard)

In [None]:
url = 'https://www.billboard.com/charts/hot-100/'

In [None]:
response = requests.get(url)
response.status_code

In [None]:
soup = BeautifulSoup(response.content, "html.parser")

In [None]:
titles = [tag.get_text().strip() for tag in soup.select('.chart-results-list #title-of-a-story.a-no-trucate')]
len(titles)

In [None]:
artists = [tag.get_text().strip() for tag in soup.select('.chart-results-list span.a-no-trucate')]
len(artists)

In [None]:
chart_credits = [tag for tag in soup.select('.o-chart-credits p')]

In [None]:
songwriters = [tag.get_text() for tag in chart_credits[::3]][1::]
len(songwriters)

In [None]:
producers = [tag.get_text() for tag in chart_credits[1::3]][1::]
len(producers)

In [None]:
labels = [tag.get_text() for tag in chart_credits[2::3]][1::]
len(labels)

In [None]:
awards = [tag for tag in soup.select('.chart-results-list li.lrv-u-width-100p ul li:nth-child(2)')][1::2]
len(awards)

In [None]:
last_weeks = [tag.get_text().strip() for tag in soup.select('.chart-results-list li.lrv-u-width-100p ul li:nth-child(3)')][1::2]
len(last_weeks)

In [None]:
peak_pos = [tag.get_text().strip() for tag in soup.select('.chart-results-list li.lrv-u-width-100p ul li:nth-child(4)')][1::2]
len(peak_pos)

In [None]:
wks = [tag.get_text().strip() for tag in soup.select('.chart-results-list li.lrv-u-width-100p ul li:nth-child(5)')][1::2]
len(wks)

In [None]:
data = pd.DataFrame({
    'title': titles,
    'artists': artists,
    'songwriter': songwriters,
    'producer': producers,
    'label': labels,
    'award': awards,
    'last_week': last_weeks,
    'peak_pos': peak_pos,
    'wks': wks
})

data.head()

In [None]:
data.to_csv('top_100_songs_billboard.csv')

#### Writing program for song recommendation

In [None]:
print('''Welcome User! 
Give me the title of a song you like and I will recommend you a new one.''')

titles = data['title'].str.lower().str.strip()

while True:
    user_input = input("What is your song? (or 'exit' to quit): ")
    if user_input.strip() == '':
        print('You have to input a valid string (Not empty, nor only white-spaces).')
        continue

    user_input_lower = user_input.lower()
    if user_input_lower == 'exit':
        break

    user_title = user_input_lower.strip()
    if user_title not in titles.values:
        print(f"I don't know '{user_input}' song and therefore cannot recommend anything. Try again.")
        continue

    filtered_titles = [title for title in titles if title != user_title]
    random_title = random.choice(filtered_titles)

    print(f"Based on '{user_input}', you can try: '{random_title}'")