# Billboard

![title](../img/billboard2020.png)

In [4]:
import pandas as pd
import requests
import re
import time
from bs4 import BeautifulSoup as bs
from pprint import pprint

In [5]:
from IPython.core.display import HTML

In [7]:
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_colwidth', 200)

In [8]:
url_dict = {
    'billboard2019' : 'https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2019',
    'billboard2020' : 'https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2020',
}

# Which year to get

In [9]:
bdsongs = 'billboard2020'

In [10]:
url = url_dict[bdsongs]
response = requests.get(url)

response.status_code

200

In [11]:
page = response.text

soup = bs(page)

In [12]:
wikiurl_prefix = 'https://en.wikipedia.org'

In [13]:
table = soup.find('table', class_ = 'wikitable')

In [14]:
tbdata = ([p.find_all('td') for p in table.find_all('tr') if p.find_all('td')])

In [15]:
tbdata[:3]

[[<td>1</td>,
  <td>"<a href="/wiki/Blinding_Lights" title="Blinding Lights">Blinding Lights</a>"</td>,
  <td><a href="/wiki/The_Weeknd" title="The Weeknd">The Weeknd</a>
  </td>],
 [<td>2</td>,
  <td>"<a href="/wiki/Circles_(Post_Malone_song)" title="Circles (Post Malone song)">Circles</a>"</td>,
  <td><a href="/wiki/Post_Malone" title="Post Malone">Post Malone</a>
  </td>],
 [<td>3</td>,
  <td>"<a href="/wiki/The_Box_(Roddy_Ricch_song)" title="The Box (Roddy Ricch song)">The Box</a>"</td>,
  <td><a href="/wiki/Roddy_Ricch" title="Roddy Ricch">Roddy Ricch</a>
  </td>]]

In [16]:
def get_ytlink_from_wiki(wikipage):
    time.sleep(1)
    
    try:
        response = requests.get(wikipage)   
        ipage = response.text
        isoup = bs(ipage)
    except:
        return None
        
    try:
        iinfobox = isoup.find('table', class_ = 'infobox')
        ytlink = iinfobox.find('a', class_ = 'external')['href']
    except :
        ytlink = None
    
    return ytlink

In [17]:
songlist = []

for rank, tb in enumerate(tbdata[:]):
    wikipage = wikiurl_prefix + tb[-2].find('a')['href']

    title = tb[-2].find('a').text.strip('\n')
    
    artist = tb[-1].text.strip('\n')
    
    ytlink = get_ytlink_from_wiki(wikipage)
    
    print(f'{rank}, {title}, {ytlink}, {artist}, {wikipage}')
    songlist.append(
        {
            'title' : title,
            'ytlink' : ytlink,
            'artist' : artist,
            'wikipage' : wikipage,
        }
    )

0, Blinding Lights, https://www.youtube.com/watch?v=4NRXx6U8ABQ, The Weeknd, https://en.wikipedia.org/wiki/Blinding_Lights
1, Circles, https://www.youtube.com/watch?v=wXhTHyIgQ_U, Post Malone, https://en.wikipedia.org/wiki/Circles_(Post_Malone_song)
2, The Box, https://www.youtube.com/watch?v=UNZqm3dxd2w, Roddy Ricch, https://en.wikipedia.org/wiki/The_Box_(Roddy_Ricch_song)
3, Don't Start Now, https://www.youtube.com/watch?v=oygrmJFKYZY, Dua Lipa, https://en.wikipedia.org/wiki/Don%27t_Start_Now
4, Rockstar, https://www.youtube.com/watch?v=mxFstYSbBmc, DaBaby featuring Roddy Ricch, https://en.wikipedia.org/wiki/Rockstar_(DaBaby_song)
5, Adore You, https://www.youtube.com/watch?v=yezDEWako8U, Harry Styles, https://en.wikipedia.org/wiki/Adore_You_(Harry_Styles_song)
6, Life Is Good, https://www.youtube.com/watch?v=l0U7SxXHkPY, Future featuring Drake, https://en.wikipedia.org/wiki/Life_Is_Good_(song)
7, Memories, https://www.youtube.com/watch?v=SlPhMPnQ58k, Maroon 5, https://en.wikipedia.o

62, Bandit, https://www.youtube.com/watch?v=Sw5fNI400E4, Juice Wrld and YoungBoy Never Broke Again, https://en.wikipedia.org/wiki/Bandit_(song)
63, Party Girl, https://www.youtube.com/watch?v=1KlNDZpCkHc, StaySolidRocky, https://en.wikipedia.org/wiki/Party_Girl_(StaySolidRocky_song)
64, Die from a Broken Heart, None, Maddie & Tae, https://en.wikipedia.org/wiki/Die_from_a_Broken_Heart
65, Popstar, None, DJ Khaled featuring Drake, https://en.wikipedia.org/wiki/Popstar_(DJ_Khaled_song)
66, All I Want for Christmas Is You, https://www.youtube.com/watch?v=yXQViqx6GMY, Mariah Carey, https://en.wikipedia.org/wiki/All_I_Want_for_Christmas_Is_You
67, One of Them Girls, None, Lee Brice, https://en.wikipedia.org/wiki/One_of_Them_Girls
68, Hard to Forget, https://www.youtube.com/watch?v=Wxhv_HsEIl4, Sam Hunt, https://en.wikipedia.org/wiki/Hard_to_Forget
69, One Margarita, None, Luke Bryan, https://en.wikipedia.org/wiki/One_Margarita
70, Panini, https://www.youtube.com/watch?v=bXcSLI58-h8, Lil Nas 

In [18]:
df = pd.DataFrame(songlist)
df

Unnamed: 0,title,ytlink,artist,wikipage
0,Blinding Lights,https://www.youtube.com/watch?v=4NRXx6U8ABQ,The Weeknd,https://en.wikipedia.org/wiki/Blinding_Lights
1,Circles,https://www.youtube.com/watch?v=wXhTHyIgQ_U,Post Malone,https://en.wikipedia.org/wiki/Circles_(Post_Malone_song)
2,The Box,https://www.youtube.com/watch?v=UNZqm3dxd2w,Roddy Ricch,https://en.wikipedia.org/wiki/The_Box_(Roddy_Ricch_song)
3,Don't Start Now,https://www.youtube.com/watch?v=oygrmJFKYZY,Dua Lipa,https://en.wikipedia.org/wiki/Don%27t_Start_Now
4,Rockstar,https://www.youtube.com/watch?v=mxFstYSbBmc,DaBaby featuring Roddy Ricch,https://en.wikipedia.org/wiki/Rockstar_(DaBaby_song)
...,...,...,...,...
95,More Than My Hometown,,Morgan Wallen,https://en.wikipedia.org/wiki/More_Than_My_Hometown
96,Lovin' on You,,Luke Combs,https://en.wikipedia.org/wiki/Lovin%27_on_You
97,Said Sum,https://www.youtube.com/watch?v=T8pi91qWnRw,Moneybagg Yo,https://en.wikipedia.org/wiki/Said_Sum
98,Slide,https://www.youtube.com/watch?v=SUJloylmEZM,H.E.R. featuring YG,https://en.wikipedia.org/wiki/Slide_(H.E.R._song)


In [25]:
df.to_csv(f'{bdsongs}.csv', index = False)